Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX
      3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VL
      4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VLDQ
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VL
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VLDQ
      8 
      9 ; FIXME: Drop the regex pattern matching of 'nan' once we drop support for MSVC
     10 ; 2013.
     11 
     12 define <2 x double> @fabs_v2f64(<2 x double> %p) {
     13 ; X32-LABEL: fabs_v2f64:
     14 ; X32:       # %bb.0:
     15 ; X32-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
     16 ; X32-NEXT:    retl
     17 ;
     18 ; X64-LABEL: fabs_v2f64:
     19 ; X64:       # %bb.0:
     20 ; X64-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
     21 ; X64-NEXT:    retq
     22   %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
     23   ret <2 x double> %t
     24 }
     25 declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p)
     26 
     27 define <4 x float> @fabs_v4f32(<4 x float> %p) {
     28 ; X32_AVX-LABEL: fabs_v4f32:
     29 ; X32_AVX:       # %bb.0:
     30 ; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %xmm0, %xmm0
     31 ; X32_AVX-NEXT:    retl
     32 ;
     33 ; X32_AVX512VL-LABEL: fabs_v4f32:
     34 ; X32_AVX512VL:       # %bb.0:
     35 ; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
     36 ; X32_AVX512VL-NEXT:    retl
     37 ;
     38 ; X32_AVX512VLDQ-LABEL: fabs_v4f32:
     39 ; X32_AVX512VLDQ:       # %bb.0:
     40 ; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0
     41 ; X32_AVX512VLDQ-NEXT:    retl
     42 ;
     43 ; X64_AVX-LABEL: fabs_v4f32:
     44 ; X64_AVX:       # %bb.0:
     45 ; X64_AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
     46 ; X64_AVX-NEXT:    retq
     47 ;
     48 ; X64_AVX512VL-LABEL: fabs_v4f32:
     49 ; X64_AVX512VL:       # %bb.0:
     50 ; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
     51 ; X64_AVX512VL-NEXT:    retq
     52 ;
     53 ; X64_AVX512VLDQ-LABEL: fabs_v4f32:
     54 ; X64_AVX512VLDQ:       # %bb.0:
     55 ; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0
     56 ; X64_AVX512VLDQ-NEXT:    retq
     57   %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
     58   ret <4 x float> %t
     59 }
     60 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
     61 
     62 define <4 x double> @fabs_v4f64(<4 x double> %p) {
     63 ; X32_AVX-LABEL: fabs_v4f64:
     64 ; X32_AVX:       # %bb.0:
     65 ; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
     66 ; X32_AVX-NEXT:    retl
     67 ;
     68 ; X32_AVX512VL-LABEL: fabs_v4f64:
     69 ; X32_AVX512VL:       # %bb.0:
     70 ; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
     71 ; X32_AVX512VL-NEXT:    retl
     72 ;
     73 ; X32_AVX512VLDQ-LABEL: fabs_v4f64:
     74 ; X32_AVX512VLDQ:       # %bb.0:
     75 ; X32_AVX512VLDQ-NEXT:    vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0
     76 ; X32_AVX512VLDQ-NEXT:    retl
     77 ;
     78 ; X64_AVX-LABEL: fabs_v4f64:
     79 ; X64_AVX:       # %bb.0:
     80 ; X64_AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
     81 ; X64_AVX-NEXT:    retq
     82 ;
     83 ; X64_AVX512VL-LABEL: fabs_v4f64:
     84 ; X64_AVX512VL:       # %bb.0:
     85 ; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0
     86 ; X64_AVX512VL-NEXT:    retq
     87 ;
     88 ; X64_AVX512VLDQ-LABEL: fabs_v4f64:
     89 ; X64_AVX512VLDQ:       # %bb.0:
     90 ; X64_AVX512VLDQ-NEXT:    vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0
     91 ; X64_AVX512VLDQ-NEXT:    retq
     92   %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
     93   ret <4 x double> %t
     94 }
     95 declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
     96 
     97 define <8 x float> @fabs_v8f32(<8 x float> %p) {
     98 ; X32_AVX-LABEL: fabs_v8f32:
     99 ; X32_AVX:       # %bb.0:
    100 ; X32_AVX-NEXT:    vandps {{\.LCPI.*}}, %ymm0, %ymm0
    101 ; X32_AVX-NEXT:    retl
    102 ;
    103 ; X32_AVX512VL-LABEL: fabs_v8f32:
    104 ; X32_AVX512VL:       # %bb.0:
    105 ; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
    106 ; X32_AVX512VL-NEXT:    retl
    107 ;
    108 ; X32_AVX512VLDQ-LABEL: fabs_v8f32:
    109 ; X32_AVX512VLDQ:       # %bb.0:
    110 ; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
    111 ; X32_AVX512VLDQ-NEXT:    retl
    112 ;
    113 ; X64_AVX-LABEL: fabs_v8f32:
    114 ; X64_AVX:       # %bb.0:
    115 ; X64_AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
    116 ; X64_AVX-NEXT:    retq
    117 ;
    118 ; X64_AVX512VL-LABEL: fabs_v8f32:
    119 ; X64_AVX512VL:       # %bb.0:
    120 ; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0
    121 ; X64_AVX512VL-NEXT:    retq
    122 ;
    123 ; X64_AVX512VLDQ-LABEL: fabs_v8f32:
    124 ; X64_AVX512VLDQ:       # %bb.0:
    125 ; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0
    126 ; X64_AVX512VLDQ-NEXT:    retq
    127   %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
    128   ret <8 x float> %t
    129 }
    130 declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
    131 
    132 define <8 x double> @fabs_v8f64(<8 x double> %p) {
    133 ; X32_AVX-LABEL: fabs_v8f64:
    134 ; X32_AVX:       # %bb.0:
    135 ; X32_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
    136 ; X32_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
    137 ; X32_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
    138 ; X32_AVX-NEXT:    retl
    139 ;
    140 ; X32_AVX512VL-LABEL: fabs_v8f64:
    141 ; X32_AVX512VL:       # %bb.0:
    142 ; X32_AVX512VL-NEXT:    vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
    143 ; X32_AVX512VL-NEXT:    retl
    144 ;
    145 ; X32_AVX512VLDQ-LABEL: fabs_v8f64:
    146 ; X32_AVX512VLDQ:       # %bb.0:
    147 ; X32_AVX512VLDQ-NEXT:    vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0
    148 ; X32_AVX512VLDQ-NEXT:    retl
    149 ;
    150 ; X64_AVX-LABEL: fabs_v8f64:
    151 ; X64_AVX:       # %bb.0:
    152 ; X64_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
    153 ; X64_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
    154 ; X64_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
    155 ; X64_AVX-NEXT:    retq
    156 ;
    157 ; X64_AVX512VL-LABEL: fabs_v8f64:
    158 ; X64_AVX512VL:       # %bb.0:
    159 ; X64_AVX512VL-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0
    160 ; X64_AVX512VL-NEXT:    retq
    161 ;
    162 ; X64_AVX512VLDQ-LABEL: fabs_v8f64:
    163 ; X64_AVX512VLDQ:       # %bb.0:
    164 ; X64_AVX512VLDQ-NEXT:    vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0
    165 ; X64_AVX512VLDQ-NEXT:    retq
    166   %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
    167   ret <8 x double> %t
    168 }
    169 declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
    170 
    171 define <16 x float> @fabs_v16f32(<16 x float> %p) {
    172 ; X32_AVX-LABEL: fabs_v16f32:
    173 ; X32_AVX:       # %bb.0:
    174 ; X32_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
    175 ; X32_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
    176 ; X32_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
    177 ; X32_AVX-NEXT:    retl
    178 ;
    179 ; X32_AVX512VL-LABEL: fabs_v16f32:
    180 ; X32_AVX512VL:       # %bb.0:
    181 ; X32_AVX512VL-NEXT:    vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
    182 ; X32_AVX512VL-NEXT:    retl
    183 ;
    184 ; X32_AVX512VLDQ-LABEL: fabs_v16f32:
    185 ; X32_AVX512VLDQ:       # %bb.0:
    186 ; X32_AVX512VLDQ-NEXT:    vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0
    187 ; X32_AVX512VLDQ-NEXT:    retl
    188 ;
    189 ; X64_AVX-LABEL: fabs_v16f32:
    190 ; X64_AVX:       # %bb.0:
    191 ; X64_AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}]
    192 ; X64_AVX-NEXT:    vandps %ymm2, %ymm0, %ymm0
    193 ; X64_AVX-NEXT:    vandps %ymm2, %ymm1, %ymm1
    194 ; X64_AVX-NEXT:    retq
    195 ;
    196 ; X64_AVX512VL-LABEL: fabs_v16f32:
    197 ; X64_AVX512VL:       # %bb.0:
    198 ; X64_AVX512VL-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0
    199 ; X64_AVX512VL-NEXT:    retq
    200 ;
    201 ; X64_AVX512VLDQ-LABEL: fabs_v16f32:
    202 ; X64_AVX512VLDQ:       # %bb.0:
    203 ; X64_AVX512VLDQ-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0
    204 ; X64_AVX512VLDQ-NEXT:    retq
    205   %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
    206   ret <16 x float> %t
    207 }
    208 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
    209 
    210 ; PR20354: when generating code for a vector fabs op,
    211 ; make sure that we're only turning off the sign bit of each float value.
    212 ; No constant pool loads or vector ops are needed for the fabs of a
    213 ; bitcasted integer constant; we should just return an integer constant
    214 ; that has the sign bits turned off.
    215 ;
    216 ; So instead of something like this:
    217 ;    movabsq (constant pool load of mask for sign bits)
    218 ;    vmovq   (move from integer register to vector/fp register)
    219 ;    vandps  (mask off sign bits)
    220 ;    vmovq   (move vector/fp register back to integer return register)
    221 ;
    222 ; We should generate:
    223 ;    mov     (put constant value in return register)
    224 
    225 define i64 @fabs_v2f32_1() {
    226 ; X32-LABEL: fabs_v2f32_1:
    227 ; X32:       # %bb.0:
    228 ; X32-NEXT:    xorl %eax, %eax
    229 ; X32-NEXT:    movl $2147483647, %edx # imm = 0x7FFFFFFF
    230 ; X32-NEXT:    retl
    231 ;
    232 ; X64-LABEL: fabs_v2f32_1:
    233 ; X64:       # %bb.0:
    234 ; X64-NEXT:    movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000
    235 ; X64-NEXT:    retq
    236  %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000
    237  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
    238  %ret = bitcast <2 x float> %fabs to i64
    239  ret i64 %ret
    240 }
    241 
    242 define i64 @fabs_v2f32_2() {
    243 ; X32-LABEL: fabs_v2f32_2:
    244 ; X32:       # %bb.0:
    245 ; X32-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
    246 ; X32-NEXT:    xorl %edx, %edx
    247 ; X32-NEXT:    retl
    248 ;
    249 ; X64-LABEL: fabs_v2f32_2:
    250 ; X64:       # %bb.0:
    251 ; X64-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
    252 ; X64-NEXT:    retq
    253  %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF
    254  %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast)
    255  %ret = bitcast <2 x float> %fabs to i64
    256  ret i64 %ret
    257 }
    258 
    259 declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p)
    260