Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mcpu=x86-64 -mattr=-sse2 | FileCheck %s --check-prefix=SSE1
      2 
      3 target triple = "x86_64-unknown-unknown"
      4 
      5 define <4 x float> @shuffle_v4f32_0001(<4 x float> %a, <4 x float> %b) {
      6 ; SSE1-LABEL: shuffle_v4f32_0001:
      7 ; SSE1:       # BB#0:
      8 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,1]
      9 ; SSE1-NEXT:    retq
     10   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
     11   ret <4 x float> %shuffle
     12 }
     13 define <4 x float> @shuffle_v4f32_0020(<4 x float> %a, <4 x float> %b) {
     14 ; SSE1-LABEL: shuffle_v4f32_0020:
     15 ; SSE1:       # BB#0:
     16 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,0]
     17 ; SSE1-NEXT:    retq
     18   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
     19   ret <4 x float> %shuffle
     20 }
     21 define <4 x float> @shuffle_v4f32_0300(<4 x float> %a, <4 x float> %b) {
     22 ; SSE1-LABEL: shuffle_v4f32_0300:
     23 ; SSE1:       # BB#0:
     24 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3,0,0]
     25 ; SSE1-NEXT:    retq
     26   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
     27   ret <4 x float> %shuffle
     28 }
     29 define <4 x float> @shuffle_v4f32_1000(<4 x float> %a, <4 x float> %b) {
     30 ; SSE1-LABEL: shuffle_v4f32_1000:
     31 ; SSE1:       # BB#0:
     32 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,0,0,0]
     33 ; SSE1-NEXT:    retq
     34   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
     35   ret <4 x float> %shuffle
     36 }
     37 define <4 x float> @shuffle_v4f32_2200(<4 x float> %a, <4 x float> %b) {
     38 ; SSE1-LABEL: shuffle_v4f32_2200:
     39 ; SSE1:       # BB#0:
     40 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,2,0,0]
     41 ; SSE1-NEXT:    retq
     42   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
     43   ret <4 x float> %shuffle
     44 }
     45 define <4 x float> @shuffle_v4f32_3330(<4 x float> %a, <4 x float> %b) {
     46 ; SSE1-LABEL: shuffle_v4f32_3330:
     47 ; SSE1:       # BB#0:
     48 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,0]
     49 ; SSE1-NEXT:    retq
     50   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
     51   ret <4 x float> %shuffle
     52 }
     53 define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
     54 ; SSE1-LABEL: shuffle_v4f32_3210:
     55 ; SSE1:       # BB#0:
     56 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
     57 ; SSE1-NEXT:    retq
     58   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
     59   ret <4 x float> %shuffle
     60 }
     61 define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
     62 ; SSE1-LABEL: shuffle_v4f32_0011:
     63 ; SSE1:       # BB#0:
     64 ; SSE1-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
     65 ; SSE1-NEXT:    retq
     66   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
     67   ret <4 x float> %shuffle
     68 }
     69 define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
     70 ; SSE1-LABEL: shuffle_v4f32_2233:
     71 ; SSE1:       # BB#0:
     72 ; SSE1-NEXT:    unpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
     73 ; SSE1-NEXT:    retq
     74   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
     75   ret <4 x float> %shuffle
     76 }
     77 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
     78 ; SSE1-LABEL: shuffle_v4f32_0022:
     79 ; SSE1:       # BB#0:
     80 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,2,2]
     81 ; SSE1-NEXT:    retq
     82   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
     83   ret <4 x float> %shuffle
     84 }
     85 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
     86 ; SSE1-LABEL: shuffle_v4f32_1133:
     87 ; SSE1:       # BB#0:
     88 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
     89 ; SSE1-NEXT:    retq
     90   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
     91   ret <4 x float> %shuffle
     92 }
     93 
     94 define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
     95 ; SSE1-LABEL: shuffle_v4f32_4zzz:
     96 ; SSE1:       # BB#0:
     97 ; SSE1-NEXT:    xorps %xmm1, %xmm1
     98 ; SSE1-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
     99 ; SSE1-NEXT:    movaps %xmm1, %xmm0
    100 ; SSE1-NEXT:    retq
    101   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
    102   ret <4 x float> %shuffle
    103 }
    104 
    105 define <4 x float> @shuffle_v4f32_z4zz(<4 x float> %a) {
    106 ; SSE1-LABEL: shuffle_v4f32_z4zz:
    107 ; SSE1:       # BB#0:
    108 ; SSE1-NEXT:    xorps %xmm1, %xmm1
    109 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
    110 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
    111 ; SSE1-NEXT:    retq
    112   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
    113   ret <4 x float> %shuffle
    114 }
    115 
    116 define <4 x float> @shuffle_v4f32_zz4z(<4 x float> %a) {
    117 ; SSE1-LABEL: shuffle_v4f32_zz4z:
    118 ; SSE1:       # BB#0:
    119 ; SSE1-NEXT:    xorps %xmm1, %xmm1
    120 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0]
    121 ; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2]
    122 ; SSE1-NEXT:    movaps %xmm1, %xmm0
    123 ; SSE1-NEXT:    retq
    124   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
    125   ret <4 x float> %shuffle
    126 }
    127 
    128 define <4 x float> @shuffle_v4f32_zuu4(<4 x float> %a) {
    129 ; SSE1-LABEL: shuffle_v4f32_zuu4:
    130 ; SSE1:       # BB#0:
    131 ; SSE1-NEXT:    xorps %xmm1, %xmm1
    132 ; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
    133 ; SSE1-NEXT:    movaps %xmm1, %xmm0
    134 ; SSE1-NEXT:    retq
    135   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
    136   ret <4 x float> %shuffle
    137 }
    138 
    139 define <4 x float> @shuffle_v4f32_zzz7(<4 x float> %a) {
    140 ; SSE1-LABEL: shuffle_v4f32_zzz7:
    141 ; SSE1:       # BB#0:
    142 ; SSE1-NEXT:    xorps %xmm1, %xmm1
    143 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,0],xmm1[2,0]
    144 ; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
    145 ; SSE1-NEXT:    movaps %xmm1, %xmm0
    146 ; SSE1-NEXT:    retq
    147   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
    148   ret <4 x float> %shuffle
    149 }
    150 
    151 define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
    152 ; SSE1-LABEL: shuffle_v4f32_z6zz:
    153 ; SSE1:       # BB#0:
    154 ; SSE1-NEXT:    xorps %xmm1, %xmm1
    155 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[0,0]
    156 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3]
    157 ; SSE1-NEXT:    retq
    158   %shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 0, i32 6, i32 2, i32 3>
    159   ret <4 x float> %shuffle
    160 }
    161 
    162 define <4 x float> @insert_reg_and_zero_v4f32(float %a) {
    163 ; SSE1-LABEL: insert_reg_and_zero_v4f32:
    164 ; SSE1:       # BB#0:
    165 ; SSE1-NEXT:    xorps %xmm1, %xmm1
    166 ; SSE1-NEXT:    movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
    167 ; SSE1-NEXT:    movaps %xmm1, %xmm0
    168 ; SSE1-NEXT:    retq
    169   %v = insertelement <4 x float> undef, float %a, i32 0
    170   %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    171   ret <4 x float> %shuffle
    172 }
    173 
    174 define <4 x float> @insert_mem_and_zero_v4f32(float* %ptr) {
    175 ; SSE1-LABEL: insert_mem_and_zero_v4f32:
    176 ; SSE1:       # BB#0:
    177 ; SSE1-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    178 ; SSE1-NEXT:    retq
    179   %a = load float, float* %ptr
    180   %v = insertelement <4 x float> undef, float %a, i32 0
    181   %shuffle = shufflevector <4 x float> %v, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    182   ret <4 x float> %shuffle
    183 }
    184 
    185 define <4 x float> @insert_mem_lo_v4f32(<2 x float>* %ptr, <4 x float> %b) {
    186 ; SSE1-LABEL: insert_mem_lo_v4f32:
    187 ; SSE1:       # BB#0:
    188 ; SSE1-NEXT:    movq (%rdi), %rax
    189 ; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
    190 ; SSE1-NEXT:    shrq $32, %rax
    191 ; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
    192 ; SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    193 ; SSE1-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    194 ; SSE1-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    195 ; SSE1-NEXT:    xorps %xmm2, %xmm2
    196 ; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
    197 ; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
    198 ; SSE1-NEXT:    movaps %xmm1, %xmm0
    199 ; SSE1-NEXT:    retq
    200   %a = load <2 x float>, <2 x float>* %ptr
    201   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    202   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
    203   ret <4 x float> %shuffle
    204 }
    205 
    206 define <4 x float> @insert_mem_hi_v4f32(<2 x float>* %ptr, <4 x float> %b) {
    207 ; SSE1-LABEL: insert_mem_hi_v4f32:
    208 ; SSE1:       # BB#0:
    209 ; SSE1-NEXT:    movq (%rdi), %rax
    210 ; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
    211 ; SSE1-NEXT:    shrq $32, %rax
    212 ; SSE1-NEXT:    movl %eax, -{{[0-9]+}}(%rsp)
    213 ; SSE1-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    214 ; SSE1-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    215 ; SSE1-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
    216 ; SSE1-NEXT:    xorps %xmm2, %xmm2
    217 ; SSE1-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3]
    218 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
    219 ; SSE1-NEXT:    retq
    220   %a = load <2 x float>, <2 x float>* %ptr
    221   %v = shufflevector <2 x float> %a, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
    222   %shuffle = shufflevector <4 x float> %v, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    223   ret <4 x float> %shuffle
    224 }
    225 
    226 define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
    227 ; SSE1-LABEL: shuffle_mem_v4f32_3210:
    228 ; SSE1:       # BB#0:
    229 ; SSE1-NEXT:    movaps (%rdi), %xmm0
    230 ; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    231 ; SSE1-NEXT:    retq
    232   %a = load <4 x float>, <4 x float>* %ptr
    233   %shuffle = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    234   ret <4 x float> %shuffle
    235 }
    236