Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
      2 
      3 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
      4 ; CHECK-LABEL: funcA:
      5 ; CHECK:       ## BB#0: ## %entry
      6 ; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
      7 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
      8 ; CHECK-NEXT:    retq
      9 entry:
     10   %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
     11   ret <32 x i8> %shuffle
     12 }
     13 
     14 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
     15 ; CHECK-LABEL: funcB:
     16 ; CHECK:       ## BB#0: ## %entry
     17 ; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
     18 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     19 ; CHECK-NEXT:    retq
     20 entry:
     21   %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
     22   ret <16 x i16> %shuffle
     23 }
     24 
     25 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
     26 ; CHECK-LABEL: funcC:
     27 ; CHECK:       ## BB#0: ## %entry
     28 ; CHECK-NEXT:    vmovq %rdi, %xmm0
     29 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     30 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     31 ; CHECK-NEXT:    retq
     32 entry:
     33   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
     34   %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
     35   %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
     36   %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
     37   ret <4 x i64> %vecinit6.i
     38 }
     39 
     40 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
     41 ; CHECK-LABEL: funcD:
     42 ; CHECK:       ## BB#0: ## %entry
     43 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     44 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     45 ; CHECK-NEXT:    retq
     46 entry:
     47   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
     48   %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
     49   %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
     50   %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
     51   ret <4 x double> %vecinit6.i
     52 }
     53 
     54 ; Test this turns into a broadcast:
     55 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
     56 ;
     57 define <8 x float> @funcE() nounwind {
     58 ; CHECK-LABEL: funcE:
     59 ; CHECK:       ## BB#0: ## %for_exit499
     60 ; CHECK-NEXT:    xorl %eax, %eax
     61 ; CHECK-NEXT:    ## implicit-def: %YMM0
     62 ; CHECK-NEXT:    testb %al, %al
     63 ; CHECK-NEXT:    jne LBB4_2
     64 ; CHECK-NEXT:  ## BB#1: ## %load.i1247
     65 ; CHECK-NEXT:    pushq %rbp
     66 ; CHECK-NEXT:    movq %rsp, %rbp
     67 ; CHECK-NEXT:    andq $-32, %rsp
     68 ; CHECK-NEXT:    subq $1312, %rsp ## imm = 0x520
     69 ; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
     70 ; CHECK-NEXT:    movq %rbp, %rsp
     71 ; CHECK-NEXT:    popq %rbp
     72 ; CHECK-NEXT:  LBB4_2: ## %__load_and_broadcast_32.exit1249
     73 ; CHECK-NEXT:    retq
     74 allocas:
     75   %udx495 = alloca [18 x [18 x float]], align 32
     76   br label %for_test505.preheader
     77 
     78 for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
     79   br i1 undef, label %for_exit499, label %for_test505.preheader
     80 
     81 for_exit499:                                      ; preds = %for_test505.preheader
     82   br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
     83 
     84 load.i1247:                                       ; preds = %for_exit499
     85   %ptr1227 = getelementptr [18 x [18 x float]], [18 x [18 x float]]* %udx495, i64 0, i64 1, i64 1
     86   %ptr.i1237 = bitcast float* %ptr1227 to i32*
     87   %val.i1238 = load i32, i32* %ptr.i1237, align 4
     88   %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
     89   %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
     90   %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
     91   br label %__load_and_broadcast_32.exit1249
     92 
     93 __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
     94   %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
     95   ret <8 x float> %load_broadcast12281250
     96 }
     97 
     98 define <8 x float> @funcF(i32 %val) nounwind {
     99 ; CHECK-LABEL: funcF:
    100 ; CHECK:       ## BB#0:
    101 ; CHECK-NEXT:    vmovd %edi, %xmm0
    102 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,0]
    103 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    104 ; CHECK-NEXT:    retq
    105   %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
    106   %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
    107   %tmp = bitcast <8 x i32> %ret7 to <8 x float>
    108   ret <8 x float> %tmp
    109 }
    110 
    111 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
    112 ; CHECK-LABEL: funcG:
    113 ; CHECK:       ## BB#0: ## %entry
    114 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    115 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    116 ; CHECK-NEXT:    retq
    117 entry:
    118   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    119   ret <8 x float> %shuffle
    120 }
    121 
    122 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
    123 ; CHECK-LABEL: funcH:
    124 ; CHECK:       ## BB#0: ## %entry
    125 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
    126 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    127 ; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    128 ; CHECK-NEXT:    retq
    129 entry:
    130   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    131   ret <8 x float> %shuffle
    132 }
    133 
    134 define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
    135 ; CHECK-LABEL: splat_load_2f64_11:
    136 ; CHECK:       ## BB#0:
    137 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0
    138 ; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
    139 ; CHECK-NEXT:    retq
    140   %x = load <2 x double>, <2 x double>* %ptr
    141   %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
    142   ret <2 x double> %x1
    143 }
    144 
    145 define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
    146 ; CHECK-LABEL: splat_load_4f64_2222:
    147 ; CHECK:       ## BB#0:
    148 ; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
    149 ; CHECK-NEXT:    retq
    150   %x = load <4 x double>, <4 x double>* %ptr
    151   %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
    152   ret <4 x double> %x1
    153 }
    154 
    155 define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
    156 ; CHECK-LABEL: splat_load_4f32_0000:
    157 ; CHECK:       ## BB#0:
    158 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
    159 ; CHECK-NEXT:    retq
    160   %x = load <4 x float>, <4 x float>* %ptr
    161   %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    162   ret <4 x float> %x1
    163 }
    164 
    165 define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
    166 ; CHECK-LABEL: splat_load_8f32_77777777:
    167 ; CHECK:       ## BB#0:
    168 ; CHECK-NEXT:    vbroadcastss 28(%rdi), %ymm0
    169 ; CHECK-NEXT:    retq
    170   %x = load <8 x float>, <8 x float>* %ptr
    171   %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
    172   ret <8 x float> %x1
    173 }
    174