Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
      2 ; RUN: llc -mtriple=x86_64-pc-linux -mattr=-sse4.1 -mcpu=corei7 < %s | FileCheck -check-prefix=CHECK -check-prefix=SSE2 %s
      3 ; RUN: llc -mtriple=x86_64-pc-linux -mcpu=corei7-avx < %s | FileCheck -check-prefix=CHECK -check-prefix=AVX %s
      4 
      5 ; Ensure that the backend no longer emits unnecessary vector insert
      6 ; instructions immediately after SSE scalar fp instructions
      7 ; like addss or mulss.
      8 
      9 
     10 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
     11   %1 = extractelement <4 x float> %b, i32 0
     12   %2 = extractelement <4 x float> %a, i32 0
     13   %add = fadd float %2, %1
     14   %3 = insertelement <4 x float> %a, float %add, i32 0
     15   ret <4 x float> %3
     16 }
     17 
     18 ; CHECK-LABEL: test_add_ss
     19 ; SSE2: addss   %xmm1, %xmm0
     20 ; AVX: vaddss   %xmm1, %xmm0, %xmm0
     21 ; CHECK-NOT: movss
     22 ; CHECK: ret
     23 
     24 
     25 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
     26   %1 = extractelement <4 x float> %b, i32 0
     27   %2 = extractelement <4 x float> %a, i32 0
     28   %sub = fsub float %2, %1
     29   %3 = insertelement <4 x float> %a, float %sub, i32 0
     30   ret <4 x float> %3
     31 }
     32 
     33 ; CHECK-LABEL: test_sub_ss
     34 ; SSE2: subss   %xmm1, %xmm0
     35 ; AVX: vsubss   %xmm1, %xmm0, %xmm0
     36 ; CHECK-NOT: movss
     37 ; CHECK: ret
     38 
     39 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
     40   %1 = extractelement <4 x float> %b, i32 0
     41   %2 = extractelement <4 x float> %a, i32 0
     42   %mul = fmul float %2, %1
     43   %3 = insertelement <4 x float> %a, float %mul, i32 0
     44   ret <4 x float> %3
     45 }
     46 
     47 ; CHECK-LABEL: test_mul_ss
     48 ; SSE2: mulss   %xmm1, %xmm0
     49 ; AVX: vmulss   %xmm1, %xmm0, %xmm0
     50 ; CHECK-NOT: movss
     51 ; CHECK: ret
     52 
     53 
     54 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
     55   %1 = extractelement <4 x float> %b, i32 0
     56   %2 = extractelement <4 x float> %a, i32 0
     57   %div = fdiv float %2, %1
     58   %3 = insertelement <4 x float> %a, float %div, i32 0
     59   ret <4 x float> %3
     60 }
     61 
     62 ; CHECK-LABEL: test_div_ss
     63 ; SSE2: divss   %xmm1, %xmm0
     64 ; AVX: vdivss   %xmm1, %xmm0, %xmm0
     65 ; CHECK-NOT: movss
     66 ; CHECK: ret
     67 
     68 
     69 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
     70   %1 = extractelement <2 x double> %b, i32 0
     71   %2 = extractelement <2 x double> %a, i32 0
     72   %add = fadd double %2, %1
     73   %3 = insertelement <2 x double> %a, double %add, i32 0
     74   ret <2 x double> %3
     75 }
     76 
     77 ; CHECK-LABEL: test_add_sd
     78 ; SSE2: addsd   %xmm1, %xmm0
     79 ; AVX: vaddsd   %xmm1, %xmm0, %xmm0
     80 ; CHECK-NOT: movsd
     81 ; CHECK: ret
     82 
     83 
     84 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
     85   %1 = extractelement <2 x double> %b, i32 0
     86   %2 = extractelement <2 x double> %a, i32 0
     87   %sub = fsub double %2, %1
     88   %3 = insertelement <2 x double> %a, double %sub, i32 0
     89   ret <2 x double> %3
     90 }
     91 
     92 ; CHECK-LABEL: test_sub_sd
     93 ; SSE2: subsd   %xmm1, %xmm0
     94 ; AVX: vsubsd   %xmm1, %xmm0, %xmm0
     95 ; CHECK-NOT: movsd
     96 ; CHECK: ret
     97 
     98 
     99 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
    100   %1 = extractelement <2 x double> %b, i32 0
    101   %2 = extractelement <2 x double> %a, i32 0
    102   %mul = fmul double %2, %1
    103   %3 = insertelement <2 x double> %a, double %mul, i32 0
    104   ret <2 x double> %3
    105 }
    106 
    107 ; CHECK-LABEL: test_mul_sd
    108 ; SSE2: mulsd   %xmm1, %xmm0
    109 ; AVX: vmulsd   %xmm1, %xmm0, %xmm0
    110 ; CHECK-NOT: movsd
    111 ; CHECK: ret
    112 
    113 
    114 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
    115   %1 = extractelement <2 x double> %b, i32 0
    116   %2 = extractelement <2 x double> %a, i32 0
    117   %div = fdiv double %2, %1
    118   %3 = insertelement <2 x double> %a, double %div, i32 0
    119   ret <2 x double> %3
    120 }
    121 
    122 ; CHECK-LABEL: test_div_sd
    123 ; SSE2: divsd   %xmm1, %xmm0
    124 ; AVX: vdivsd   %xmm1, %xmm0, %xmm0
    125 ; CHECK-NOT: movsd
    126 ; CHECK: ret
    127 
    128 
    129 define <4 x float> @test2_add_ss(<4 x float> %a, <4 x float> %b) {
    130   %1 = extractelement <4 x float> %a, i32 0
    131   %2 = extractelement <4 x float> %b, i32 0
    132   %add = fadd float %1, %2
    133   %3 = insertelement <4 x float> %b, float %add, i32 0
    134   ret <4 x float> %3
    135 }
    136 
    137 ; CHECK-LABEL: test2_add_ss
    138 ; SSE2: addss   %xmm0, %xmm1
    139 ; AVX: vaddss   %xmm0, %xmm1, %xmm0
    140 ; CHECK-NOT: movss
    141 ; CHECK: ret
    142 
    143 
    144 define <4 x float> @test2_sub_ss(<4 x float> %a, <4 x float> %b) {
    145   %1 = extractelement <4 x float> %a, i32 0
    146   %2 = extractelement <4 x float> %b, i32 0
    147   %sub = fsub float %2, %1
    148   %3 = insertelement <4 x float> %b, float %sub, i32 0
    149   ret <4 x float> %3
    150 }
    151 
    152 ; CHECK-LABEL: test2_sub_ss
    153 ; SSE2: subss   %xmm0, %xmm1
    154 ; AVX: vsubss   %xmm0, %xmm1, %xmm0
    155 ; CHECK-NOT: movss
    156 ; CHECK: ret
    157 
    158 
    159 define <4 x float> @test2_mul_ss(<4 x float> %a, <4 x float> %b) {
    160   %1 = extractelement <4 x float> %a, i32 0
    161   %2 = extractelement <4 x float> %b, i32 0
    162   %mul = fmul float %1, %2
    163   %3 = insertelement <4 x float> %b, float %mul, i32 0
    164   ret <4 x float> %3
    165 }
    166 
    167 ; CHECK-LABEL: test2_mul_ss
    168 ; SSE2: mulss   %xmm0, %xmm1
    169 ; AVX: vmulss   %xmm0, %xmm1, %xmm0
    170 ; CHECK-NOT: movss
    171 ; CHECK: ret
    172 
    173 
    174 define <4 x float> @test2_div_ss(<4 x float> %a, <4 x float> %b) {
    175   %1 = extractelement <4 x float> %a, i32 0
    176   %2 = extractelement <4 x float> %b, i32 0
    177   %div = fdiv float %2, %1
    178   %3 = insertelement <4 x float> %b, float %div, i32 0
    179   ret <4 x float> %3
    180 }
    181 
    182 ; CHECK-LABEL: test2_div_ss
    183 ; SSE2: divss   %xmm0, %xmm1
    184 ; AVX: vdivss   %xmm0, %xmm1, %xmm0
    185 ; CHECK-NOT: movss
    186 ; CHECK: ret
    187 
    188 
    189 define <2 x double> @test2_add_sd(<2 x double> %a, <2 x double> %b) {
    190   %1 = extractelement <2 x double> %a, i32 0
    191   %2 = extractelement <2 x double> %b, i32 0
    192   %add = fadd double %1, %2
    193   %3 = insertelement <2 x double> %b, double %add, i32 0
    194   ret <2 x double> %3
    195 }
    196 
    197 ; CHECK-LABEL: test2_add_sd
    198 ; SSE2: addsd   %xmm0, %xmm1
    199 ; AVX: vaddsd   %xmm0, %xmm1, %xmm0
    200 ; CHECK-NOT: movsd
    201 ; CHECK: ret
    202 
    203 
    204 define <2 x double> @test2_sub_sd(<2 x double> %a, <2 x double> %b) {
    205   %1 = extractelement <2 x double> %a, i32 0
    206   %2 = extractelement <2 x double> %b, i32 0
    207   %sub = fsub double %2, %1
    208   %3 = insertelement <2 x double> %b, double %sub, i32 0
    209   ret <2 x double> %3
    210 }
    211 
    212 ; CHECK-LABEL: test2_sub_sd
    213 ; SSE2: subsd   %xmm0, %xmm1
    214 ; AVX: vsubsd   %xmm0, %xmm1, %xmm0
    215 ; CHECK-NOT: movsd
    216 ; CHECK: ret
    217 
    218 
    219 define <2 x double> @test2_mul_sd(<2 x double> %a, <2 x double> %b) {
    220   %1 = extractelement <2 x double> %a, i32 0
    221   %2 = extractelement <2 x double> %b, i32 0
    222   %mul = fmul double %1, %2
    223   %3 = insertelement <2 x double> %b, double %mul, i32 0
    224   ret <2 x double> %3
    225 }
    226 
    227 ; CHECK-LABEL: test2_mul_sd
    228 ; SSE2: mulsd   %xmm0, %xmm1
    229 ; AVX: vmulsd   %xmm0, %xmm1, %xmm0
    230 ; CHECK-NOT: movsd
    231 ; CHECK: ret
    232 
    233 
    234 define <2 x double> @test2_div_sd(<2 x double> %a, <2 x double> %b) {
    235   %1 = extractelement <2 x double> %a, i32 0
    236   %2 = extractelement <2 x double> %b, i32 0
    237   %div = fdiv double %2, %1
    238   %3 = insertelement <2 x double> %b, double %div, i32 0
    239   ret <2 x double> %3
    240 }
    241 
    242 ; CHECK-LABEL: test2_div_sd
    243 ; SSE2: divsd   %xmm0, %xmm1
    244 ; AVX: vdivsd   %xmm0, %xmm1, %xmm0
    245 ; CHECK-NOT: movsd
    246 ; CHECK: ret
    247 
    248 
    249 define <4 x float> @test_multiple_add_ss(<4 x float> %a, <4 x float> %b) {
    250   %1 = extractelement <4 x float> %b, i32 0
    251   %2 = extractelement <4 x float> %a, i32 0
    252   %add = fadd float %2, %1
    253   %add2 = fadd float %2, %add
    254   %3 = insertelement <4 x float> %a, float %add2, i32 0
    255   ret <4 x float> %3
    256 }
    257 
    258 ; CHECK-LABEL: test_multiple_add_ss
    259 ; CHECK: addss
    260 ; CHECK: addss
    261 ; CHECK-NOT: movss
    262 ; CHECK: ret
    263 
    264 
    265 define <4 x float> @test_multiple_sub_ss(<4 x float> %a, <4 x float> %b) {
    266   %1 = extractelement <4 x float> %b, i32 0
    267   %2 = extractelement <4 x float> %a, i32 0
    268   %sub = fsub float %2, %1
    269   %sub2 = fsub float %2, %sub
    270   %3 = insertelement <4 x float> %a, float %sub2, i32 0
    271   ret <4 x float> %3
    272 }
    273 
    274 ; CHECK-LABEL: test_multiple_sub_ss
    275 ; CHECK: subss
    276 ; CHECK: subss
    277 ; CHECK-NOT: movss
    278 ; CHECK: ret
    279 
    280 
    281 define <4 x float> @test_multiple_mul_ss(<4 x float> %a, <4 x float> %b) {
    282   %1 = extractelement <4 x float> %b, i32 0
    283   %2 = extractelement <4 x float> %a, i32 0
    284   %mul = fmul float %2, %1
    285   %mul2 = fmul float %2, %mul
    286   %3 = insertelement <4 x float> %a, float %mul2, i32 0
    287   ret <4 x float> %3
    288 }
    289 
    290 ; CHECK-LABEL: test_multiple_mul_ss
    291 ; CHECK: mulss
    292 ; CHECK: mulss
    293 ; CHECK-NOT: movss
    294 ; CHECK: ret
    295 
    296 define <4 x float> @test_multiple_div_ss(<4 x float> %a, <4 x float> %b) {
    297   %1 = extractelement <4 x float> %b, i32 0
    298   %2 = extractelement <4 x float> %a, i32 0
    299   %div = fdiv float %2, %1
    300   %div2 = fdiv float %2, %div
    301   %3 = insertelement <4 x float> %a, float %div2, i32 0
    302   ret <4 x float> %3
    303 }
    304 
    305 ; CHECK-LABEL: test_multiple_div_ss
    306 ; CHECK: divss
    307 ; CHECK: divss
    308 ; CHECK-NOT: movss
    309 ; CHECK: ret
    310 
    311