1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX1 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 6 7 define <4 x float> @PR32368_128(<4 x float>) { 8 ; SSE-LABEL: PR32368_128: 9 ; SSE: # %bb.0: 10 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 11 ; SSE-NEXT: addps %xmm0, %xmm0 12 ; SSE-NEXT: andps {{.*}}(%rip), %xmm0 13 ; SSE-NEXT: retq 14 ; 15 ; AVX1-LABEL: PR32368_128: 16 ; AVX1: # %bb.0: 17 ; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 18 ; AVX1-NEXT: vaddps %xmm0, %xmm0, %xmm0 19 ; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 20 ; AVX1-NEXT: retq 21 ; 22 ; AVX2-LABEL: PR32368_128: 23 ; AVX2: # %bb.0: 24 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 25 ; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 26 ; AVX2-NEXT: vaddps %xmm0, %xmm0, %xmm0 27 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 28 ; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0 29 ; AVX2-NEXT: retq 30 ; 31 ; AVX512-LABEL: PR32368_128: 32 ; AVX512: # %bb.0: 33 ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 34 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 35 ; AVX512-NEXT: vaddps %xmm0, %xmm0, %xmm0 36 ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 37 ; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 38 ; AVX512-NEXT: retq 39 %2 = bitcast <4 x float> %0 to <4 x i32> 40 %3 = and <4 x i32> %2, <i32 -292, i32 -292, i32 -292, i32 -292> 41 %4 = bitcast <4 x i32> %3 to <4 x float> 42 %5 = fmul <4 x float> %4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 43 %6 = bitcast <4 x float> %5 to <4 x i32> 44 %7 = and <4 x i32> %6, <i32 291, i32 291, i32 291, i32 291> 45 %8 = bitcast <4 x i32> %7 to <4 x float> 46 ret <4 x float> %8 47 } 48 49 define <8 x float> @PR32368_256(<8 x float>) { 50 ; SSE-LABEL: PR32368_256: 51 ; SSE: # %bb.0: 52 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [4294967004,4294967004,4294967004,4294967004] 53 ; SSE-NEXT: andps %xmm2, %xmm0 54 ; SSE-NEXT: andps %xmm2, %xmm1 55 ; SSE-NEXT: addps %xmm1, %xmm1 56 ; SSE-NEXT: addps %xmm0, %xmm0 57 ; SSE-NEXT: movaps {{.*#+}} xmm2 = [291,291,291,291] 58 ; SSE-NEXT: andps %xmm2, %xmm0 59 ; SSE-NEXT: andps %xmm2, %xmm1 60 ; SSE-NEXT: retq 61 ; 62 ; AVX1-LABEL: PR32368_256: 63 ; AVX1: # %bb.0: 64 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 65 ; AVX1-NEXT: vaddps %ymm0, %ymm0, %ymm0 66 ; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 67 ; AVX1-NEXT: retq 68 ; 69 ; AVX2-LABEL: PR32368_256: 70 ; AVX2: # %bb.0: 71 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 72 ; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 73 ; AVX2-NEXT: vaddps %ymm0, %ymm0, %ymm0 74 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 75 ; AVX2-NEXT: vandps %ymm1, %ymm0, %ymm0 76 ; AVX2-NEXT: retq 77 ; 78 ; AVX512-LABEL: PR32368_256: 79 ; AVX512: # %bb.0: 80 ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 81 ; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0 82 ; AVX512-NEXT: vaddps %ymm0, %ymm0, %ymm0 83 ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 84 ; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0 85 ; AVX512-NEXT: retq 86 %2 = bitcast <8 x float> %0 to <8 x i32> 87 %3 = and <8 x i32> %2, <i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292> 88 %4 = bitcast <8 x i32> %3 to <8 x float> 89 %5 = fmul <8 x float> %4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 90 %6 = bitcast <8 x float> %5 to <8 x i32> 91 %7 = and <8 x i32> %6, <i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291> 92 %8 = bitcast <8 x i32> %7 to <8 x float> 93 ret <8 x float> %8 94 } 95 96 define <16 x float> @PR32368_512(<16 x float>) { 97 ; SSE-LABEL: PR32368_512: 98 ; SSE: # %bb.0: 99 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [4294967004,4294967004,4294967004,4294967004] 100 ; SSE-NEXT: andps %xmm4, %xmm0 101 ; SSE-NEXT: andps %xmm4, %xmm1 102 ; SSE-NEXT: andps %xmm4, %xmm2 103 ; SSE-NEXT: andps %xmm4, %xmm3 104 ; SSE-NEXT: addps %xmm3, %xmm3 105 ; SSE-NEXT: addps %xmm2, %xmm2 106 ; SSE-NEXT: addps %xmm1, %xmm1 107 ; SSE-NEXT: addps %xmm0, %xmm0 108 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [291,291,291,291] 109 ; SSE-NEXT: andps %xmm4, %xmm0 110 ; SSE-NEXT: andps %xmm4, %xmm1 111 ; SSE-NEXT: andps %xmm4, %xmm2 112 ; SSE-NEXT: andps %xmm4, %xmm3 113 ; SSE-NEXT: retq 114 ; 115 ; AVX1-LABEL: PR32368_512: 116 ; AVX1: # %bb.0: 117 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004,4294967004] 118 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 119 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 120 ; AVX1-NEXT: vaddps %ymm1, %ymm1, %ymm1 121 ; AVX1-NEXT: vaddps %ymm0, %ymm0, %ymm0 122 ; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [291,291,291,291,291,291,291,291] 123 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 124 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 125 ; AVX1-NEXT: retq 126 ; 127 ; AVX2-LABEL: PR32368_512: 128 ; AVX2: # %bb.0: 129 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 130 ; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0 131 ; AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 132 ; AVX2-NEXT: vaddps %ymm1, %ymm1, %ymm1 133 ; AVX2-NEXT: vaddps %ymm0, %ymm0, %ymm0 134 ; AVX2-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 135 ; AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0 136 ; AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 137 ; AVX2-NEXT: retq 138 ; 139 ; AVX512-LABEL: PR32368_512: 140 ; AVX512: # %bb.0: 141 ; AVX512-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 142 ; AVX512-NEXT: vaddps %zmm0, %zmm0, %zmm0 143 ; AVX512-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 144 ; AVX512-NEXT: retq 145 %2 = bitcast <16 x float> %0 to <16 x i32> 146 %3 = and <16 x i32> %2, <i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292, i32 -292> 147 %4 = bitcast <16 x i32> %3 to <16 x float> 148 %5 = fmul <16 x float> %4, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 149 %6 = bitcast <16 x float> %5 to <16 x i32> 150 %7 = and <16 x i32> %6, <i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291, i32 291> 151 %8 = bitcast <16 x i32> %7 to <16 x float> 152 ret <16 x float> %8 153 } 154