Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
      3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
      4 
      5 ; Verify that we're folding the load into the math instruction.
      6 ; This pattern is generated out of the simplest intrinsics usage:
      7 ;  _mm_add_ss(a, _mm_load_ss(b));
      8 
      9 define <4 x float> @addss(<4 x float> %va, float* %pb) {
     10 ; SSE-LABEL: addss:
     11 ; SSE:       # BB#0:
     12 ; SSE-NEXT:    addss (%rdi), %xmm0
     13 ; SSE-NEXT:    retq
     14 ;
     15 ; AVX-LABEL: addss:
     16 ; AVX:       # BB#0:
     17 ; AVX-NEXT:    vaddss (%rdi), %xmm0, %xmm0
     18 ; AVX-NEXT:    retq
     19     %a = extractelement <4 x float> %va, i32 0
     20     %b = load float, float* %pb
     21     %r = fadd float %a, %b
     22     %vr = insertelement <4 x float> %va, float %r, i32 0
     23     ret <4 x float> %vr
     24 }
     25 
     26 define <2 x double> @addsd(<2 x double> %va, double* %pb) {
     27 ; SSE-LABEL: addsd:
     28 ; SSE:       # BB#0:
     29 ; SSE-NEXT:    addsd (%rdi), %xmm0
     30 ; SSE-NEXT:    retq
     31 ;
     32 ; AVX-LABEL: addsd:
     33 ; AVX:       # BB#0:
     34 ; AVX-NEXT:    vaddsd (%rdi), %xmm0, %xmm0
     35 ; AVX-NEXT:    retq
     36     %a = extractelement <2 x double> %va, i32 0
     37     %b = load double, double* %pb
     38     %r = fadd double %a, %b
     39     %vr = insertelement <2 x double> %va, double %r, i32 0
     40     ret <2 x double> %vr
     41 }
     42 
     43 define <4 x float> @subss(<4 x float> %va, float* %pb) {
     44 ; SSE-LABEL: subss:
     45 ; SSE:       # BB#0:
     46 ; SSE-NEXT:    subss (%rdi), %xmm0
     47 ; SSE-NEXT:    retq
     48 ;
     49 ; AVX-LABEL: subss:
     50 ; AVX:       # BB#0:
     51 ; AVX-NEXT:    vsubss (%rdi), %xmm0, %xmm0
     52 ; AVX-NEXT:    retq
     53     %a = extractelement <4 x float> %va, i32 0
     54     %b = load float, float* %pb
     55     %r = fsub float %a, %b
     56     %vr = insertelement <4 x float> %va, float %r, i32 0
     57     ret <4 x float> %vr
     58 }
     59 
     60 define <2 x double> @subsd(<2 x double> %va, double* %pb) {
     61 ; SSE-LABEL: subsd:
     62 ; SSE:       # BB#0:
     63 ; SSE-NEXT:    subsd (%rdi), %xmm0
     64 ; SSE-NEXT:    retq
     65 ;
     66 ; AVX-LABEL: subsd:
     67 ; AVX:       # BB#0:
     68 ; AVX-NEXT:    vsubsd (%rdi), %xmm0, %xmm0
     69 ; AVX-NEXT:    retq
     70     %a = extractelement <2 x double> %va, i32 0
     71     %b = load double, double* %pb
     72     %r = fsub double %a, %b
     73     %vr = insertelement <2 x double> %va, double %r, i32 0
     74     ret <2 x double> %vr
     75 }
     76 
     77 define <4 x float> @mulss(<4 x float> %va, float* %pb) {
     78 ; SSE-LABEL: mulss:
     79 ; SSE:       # BB#0:
     80 ; SSE-NEXT:    mulss (%rdi), %xmm0
     81 ; SSE-NEXT:    retq
     82 ;
     83 ; AVX-LABEL: mulss:
     84 ; AVX:       # BB#0:
     85 ; AVX-NEXT:    vmulss (%rdi), %xmm0, %xmm0
     86 ; AVX-NEXT:    retq
     87     %a = extractelement <4 x float> %va, i32 0
     88     %b = load float, float* %pb
     89     %r = fmul float %a, %b
     90     %vr = insertelement <4 x float> %va, float %r, i32 0
     91     ret <4 x float> %vr
     92 }
     93 
     94 define <2 x double> @mulsd(<2 x double> %va, double* %pb) {
     95 ; SSE-LABEL: mulsd:
     96 ; SSE:       # BB#0:
     97 ; SSE-NEXT:    mulsd (%rdi), %xmm0
     98 ; SSE-NEXT:    retq
     99 ;
    100 ; AVX-LABEL: mulsd:
    101 ; AVX:       # BB#0:
    102 ; AVX-NEXT:    vmulsd (%rdi), %xmm0, %xmm0
    103 ; AVX-NEXT:    retq
    104     %a = extractelement <2 x double> %va, i32 0
    105     %b = load double, double* %pb
    106     %r = fmul double %a, %b
    107     %vr = insertelement <2 x double> %va, double %r, i32 0
    108     ret <2 x double> %vr
    109 }
    110 
    111 define <4 x float> @divss(<4 x float> %va, float* %pb) {
    112 ; SSE-LABEL: divss:
    113 ; SSE:       # BB#0:
    114 ; SSE-NEXT:    divss (%rdi), %xmm0
    115 ; SSE-NEXT:    retq
    116 ;
    117 ; AVX-LABEL: divss:
    118 ; AVX:       # BB#0:
    119 ; AVX-NEXT:    vdivss (%rdi), %xmm0, %xmm0
    120 ; AVX-NEXT:    retq
    121     %a = extractelement <4 x float> %va, i32 0
    122     %b = load float, float* %pb
    123     %r = fdiv float %a, %b
    124     %vr = insertelement <4 x float> %va, float %r, i32 0
    125     ret <4 x float> %vr
    126 }
    127 
    128 define <2 x double> @divsd(<2 x double> %va, double* %pb) {
    129 ; SSE-LABEL: divsd:
    130 ; SSE:       # BB#0:
    131 ; SSE-NEXT:    divsd (%rdi), %xmm0
    132 ; SSE-NEXT:    retq
    133 ;
    134 ; AVX-LABEL: divsd:
    135 ; AVX:       # BB#0:
    136 ; AVX-NEXT:    vdivsd (%rdi), %xmm0, %xmm0
    137 ; AVX-NEXT:    retq
    138     %a = extractelement <2 x double> %va, i32 0
    139     %b = load double, double* %pb
    140     %r = fdiv double %a, %b
    141     %vr = insertelement <2 x double> %va, double %r, i32 0
    142     ret <2 x double> %vr
    143 }
    144