Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
      3 
      4 ;
      5 ; NOTE: this is generated by utils/update_llc_test_checks.py but we can't check NAN types (PR30443),
      6 ; so we need to edit it to remove the NAN constant comments
      7 ;
      8 
      9 ; copysign(x, c1) -> fabs(x) iff ispos(c1)
     10 define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) {
     11 ; SSE-LABEL: combine_vec_fcopysign_pos_constant0:
     12 ; SSE:       # %bb.0:
     13 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
     14 ; SSE-NEXT:    retq
     15 ;
     16 ; AVX-LABEL: combine_vec_fcopysign_pos_constant0:
     17 ; AVX:       # %bb.0:
     18 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     19 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
     20 ; AVX-NEXT:    retq
     21   %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>)
     22   ret <4 x float> %1
     23 }
     24 
     25 define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) {
     26 ; SSE-LABEL: combine_vec_fcopysign_pos_constant1:
     27 ; SSE:       # %bb.0:
     28 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
     29 ; SSE-NEXT:    retq
     30 ;
     31 ; AVX-LABEL: combine_vec_fcopysign_pos_constant1:
     32 ; AVX:       # %bb.0:
     33 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     34 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
     35 ; AVX-NEXT:    retq
     36   %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>)
     37   ret <4 x float> %1
     38 }
     39 
     40 define <4 x float> @combine_vec_fcopysign_fabs_sgn(<4 x float> %x, <4 x float> %y) {
     41 ; SSE-LABEL: combine_vec_fcopysign_fabs_sgn:
     42 ; SSE:       # %bb.0:
     43 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
     44 ; SSE-NEXT:    retq
     45 ;
     46 ; AVX-LABEL: combine_vec_fcopysign_fabs_sgn:
     47 ; AVX:       # %bb.0:
     48 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     49 ; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
     50 ; AVX-NEXT:    retq
     51   %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
     52   %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
     53   ret <4 x float> %2
     54 }
     55 
     56 ; copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
     57 define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) {
     58 ; SSE-LABEL: combine_vec_fcopysign_neg_constant0:
     59 ; SSE:       # %bb.0:
     60 ; SSE-NEXT:    orps {{.*}}(%rip), %xmm0
     61 ; SSE-NEXT:    retq
     62 ;
     63 ; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
     64 ; AVX:       # %bb.0:
     65 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     66 ; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
     67 ; AVX-NEXT:    retq
     68   %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
     69   ret <4 x float> %1
     70 }
     71 
     72 define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) {
     73 ; SSE-LABEL: combine_vec_fcopysign_neg_constant1:
     74 ; SSE:       # %bb.0:
     75 ; SSE-NEXT:    orps {{.*}}(%rip), %xmm0
     76 ; SSE-NEXT:    retq
     77 ;
     78 ; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
     79 ; AVX:       # %bb.0:
     80 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     81 ; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
     82 ; AVX-NEXT:    retq
     83   %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
     84   ret <4 x float> %1
     85 }
     86 
     87 define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x float> %y) {
     88 ; SSE-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
     89 ; SSE:       # %bb.0:
     90 ; SSE-NEXT:    orps {{.*}}(%rip), %xmm0
     91 ; SSE-NEXT:    retq
     92 ;
     93 ; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
     94 ; AVX:       # %bb.0:
     95 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
     96 ; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
     97 ; AVX-NEXT:    retq
     98   %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
     99   %2 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %1
    100   %3 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %2)
    101   ret <4 x float> %3
    102 }
    103 
    104 ; copysign(fabs(x), y) -> copysign(x, y)
    105 define <4 x float> @combine_vec_fcopysign_fabs_mag(<4 x float> %x, <4 x float> %y) {
    106 ; SSE-LABEL: combine_vec_fcopysign_fabs_mag:
    107 ; SSE:       # %bb.0:
    108 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm1
    109 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    110 ; SSE-NEXT:    orps %xmm1, %xmm0
    111 ; SSE-NEXT:    retq
    112 ;
    113 ; AVX-LABEL: combine_vec_fcopysign_fabs_mag:
    114 ; AVX:       # %bb.0:
    115 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    116 ; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
    117 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    118 ; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
    119 ; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
    120 ; AVX-NEXT:    retq
    121   %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
    122   %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
    123   ret <4 x float> %2
    124 }
    125 
    126 ; copysign(fneg(x), y) -> copysign(x, y)
    127 define <4 x float> @combine_vec_fcopysign_fneg_mag(<4 x float> %x, <4 x float> %y) {
    128 ; SSE-LABEL: combine_vec_fcopysign_fneg_mag:
    129 ; SSE:       # %bb.0:
    130 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm1
    131 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    132 ; SSE-NEXT:    orps %xmm1, %xmm0
    133 ; SSE-NEXT:    retq
    134 ;
    135 ; AVX-LABEL: combine_vec_fcopysign_fneg_mag:
    136 ; AVX:       # %bb.0:
    137 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    138 ; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
    139 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    140 ; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
    141 ; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
    142 ; AVX-NEXT:    retq
    143   %1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %x
    144   %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
    145   ret <4 x float> %2
    146 }
    147 
    148 ; copysign(copysign(x,z), y) -> copysign(x, y)
    149 define <4 x float> @combine_vec_fcopysign_fcopysign_mag(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
    150 ; SSE-LABEL: combine_vec_fcopysign_fcopysign_mag:
    151 ; SSE:       # %bb.0:
    152 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm1
    153 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    154 ; SSE-NEXT:    orps %xmm1, %xmm0
    155 ; SSE-NEXT:    retq
    156 ;
    157 ; AVX-LABEL: combine_vec_fcopysign_fcopysign_mag:
    158 ; AVX:       # %bb.0:
    159 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    160 ; AVX-NEXT:    vandps %xmm2, %xmm1, %xmm1
    161 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    162 ; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
    163 ; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
    164 ; AVX-NEXT:    retq
    165   %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %z)
    166   %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %1, <4 x float> %y)
    167   ret <4 x float> %2
    168 }
    169 
    170 ; copysign(x, copysign(y,z)) -> copysign(x, z)
    171 define <4 x float> @combine_vec_fcopysign_fcopysign_sgn(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
    172 ; SSE-LABEL: combine_vec_fcopysign_fcopysign_sgn:
    173 ; SSE:       # %bb.0:
    174 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm2
    175 ; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
    176 ; SSE-NEXT:    orps %xmm2, %xmm0
    177 ; SSE-NEXT:    retq
    178 ;
    179 ; AVX-LABEL: combine_vec_fcopysign_fcopysign_sgn:
    180 ; AVX:       # %bb.0:
    181 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
    182 ; AVX-NEXT:    vandps %xmm1, %xmm2, %xmm1
    183 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    184 ; AVX-NEXT:    vandps %xmm2, %xmm0, %xmm0
    185 ; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
    186 ; AVX-NEXT:    retq
    187   %1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %y, <4 x float> %z)
    188   %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
    189   ret <4 x float> %2
    190 }
    191 
    192 ; copysign(x, fp_extend(y)) -> copysign(x, y)
    193 define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float> %y) {
    194 ; SSE-LABEL: combine_vec_fcopysign_fpext_sgn:
    195 ; SSE:       # %bb.0:
    196 ; SSE-NEXT:    movaps %xmm2, %xmm3
    197 ; SSE-NEXT:    cvtss2sd %xmm2, %xmm4
    198 ; SSE-NEXT:    movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
    199 ; SSE-NEXT:    movaps %xmm2, %xmm6
    200 ; SSE-NEXT:    movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1]
    201 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3]
    202 ; SSE-NEXT:    movaps {{.*#+}} xmm7
    203 ; SSE-NEXT:    movaps %xmm0, %xmm2
    204 ; SSE-NEXT:    andps %xmm7, %xmm2
    205 ; SSE-NEXT:    movaps {{.*#+}} xmm8 = [-0.000000e+00,-0.000000e+00]
    206 ; SSE-NEXT:    andps %xmm8, %xmm4
    207 ; SSE-NEXT:    orps %xmm4, %xmm2
    208 ; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
    209 ; SSE-NEXT:    andps %xmm7, %xmm0
    210 ; SSE-NEXT:    xorps %xmm4, %xmm4
    211 ; SSE-NEXT:    cvtss2sd %xmm5, %xmm4
    212 ; SSE-NEXT:    andps %xmm8, %xmm4
    213 ; SSE-NEXT:    orps %xmm0, %xmm4
    214 ; SSE-NEXT:    movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0]
    215 ; SSE-NEXT:    movaps %xmm1, %xmm0
    216 ; SSE-NEXT:    movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
    217 ; SSE-NEXT:    andps %xmm7, %xmm0
    218 ; SSE-NEXT:    cvtss2sd %xmm3, %xmm3
    219 ; SSE-NEXT:    andps %xmm8, %xmm3
    220 ; SSE-NEXT:    orps %xmm0, %xmm3
    221 ; SSE-NEXT:    andps %xmm7, %xmm1
    222 ; SSE-NEXT:    xorps %xmm0, %xmm0
    223 ; SSE-NEXT:    cvtss2sd %xmm6, %xmm0
    224 ; SSE-NEXT:    andps %xmm8, %xmm0
    225 ; SSE-NEXT:    orps %xmm0, %xmm1
    226 ; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm3[0]
    227 ; SSE-NEXT:    movaps %xmm2, %xmm0
    228 ; SSE-NEXT:    retq
    229 ;
    230 ; AVX-LABEL: combine_vec_fcopysign_fpext_sgn:
    231 ; AVX:       # %bb.0:
    232 ; AVX-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
    233 ; AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
    234 ; AVX-NEXT:    vcvtps2pd %xmm1, %ymm1
    235 ; AVX-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
    236 ; AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
    237 ; AVX-NEXT:    vorps %ymm1, %ymm0, %ymm0
    238 ; AVX-NEXT:    retq
    239   %1 = fpext <4 x float> %y to <4 x double>
    240   %2 = call <4 x double> @llvm.copysign.v4f64(<4 x double> %x, <4 x double> %1)
    241   ret <4 x double> %2
    242 }
    243 
    244 ; copysign(x, fp_round(y)) -> copysign(x, y)
    245 define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x double> %y) {
    246 ; SSE-LABEL: combine_vec_fcopysign_fptrunc_sgn:
    247 ; SSE:       # %bb.0:
    248 ; SSE-NEXT:    movaps %xmm0, %xmm3
    249 ; SSE-NEXT:    movaps {{.*#+}} xmm5
    250 ; SSE-NEXT:    andps %xmm5, %xmm0
    251 ; SSE-NEXT:    cvtsd2ss %xmm1, %xmm6
    252 ; SSE-NEXT:    movaps {{.*#+}} xmm4 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00]
    253 ; SSE-NEXT:    andps %xmm4, %xmm6
    254 ; SSE-NEXT:    orps %xmm6, %xmm0
    255 ; SSE-NEXT:    movshdup {{.*#+}} xmm6 = xmm3[1,1,3,3]
    256 ; SSE-NEXT:    andps %xmm5, %xmm6
    257 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm1[1,1]
    258 ; SSE-NEXT:    cvtsd2ss %xmm1, %xmm1
    259 ; SSE-NEXT:    andps %xmm4, %xmm1
    260 ; SSE-NEXT:    orps %xmm6, %xmm1
    261 ; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    262 ; SSE-NEXT:    movaps %xmm3, %xmm1
    263 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
    264 ; SSE-NEXT:    andps %xmm5, %xmm1
    265 ; SSE-NEXT:    xorps %xmm6, %xmm6
    266 ; SSE-NEXT:    cvtsd2ss %xmm2, %xmm6
    267 ; SSE-NEXT:    andps %xmm4, %xmm6
    268 ; SSE-NEXT:    orps %xmm1, %xmm6
    269 ; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],xmm6[0],xmm0[3]
    270 ; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
    271 ; SSE-NEXT:    andps %xmm5, %xmm3
    272 ; SSE-NEXT:    movhlps {{.*#+}} xmm2 = xmm2[1,1]
    273 ; SSE-NEXT:    xorps %xmm1, %xmm1
    274 ; SSE-NEXT:    cvtsd2ss %xmm2, %xmm1
    275 ; SSE-NEXT:    andps %xmm4, %xmm1
    276 ; SSE-NEXT:    orps %xmm3, %xmm1
    277 ; SSE-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
    278 ; SSE-NEXT:    retq
    279 ;
    280 ; AVX-LABEL: combine_vec_fcopysign_fptrunc_sgn:
    281 ; AVX:       # %bb.0:
    282 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    283 ; AVX-NEXT:    vandpd %xmm2, %xmm0, %xmm0
    284 ; AVX-NEXT:    vcvtpd2ps %ymm1, %xmm1
    285 ; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
    286 ; AVX-NEXT:    vandpd %xmm2, %xmm1, %xmm1
    287 ; AVX-NEXT:    vorpd %xmm1, %xmm0, %xmm0
    288 ; AVX-NEXT:    vzeroupper
    289 ; AVX-NEXT:    retq
    290   %1 = fptrunc <4 x double> %y to <4 x float>
    291   %2 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> %1)
    292   ret <4 x float> %2
    293 }
    294 
    295 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
    296 declare <4 x float> @llvm.copysign.v4f32(<4 x float> %Mag, <4 x float> %Sgn)
    297 declare <4 x double> @llvm.copysign.v4f64(<4 x double> %Mag, <4 x double> %Sgn)
    298