1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX 3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx2 | FileCheck %s -check-prefix=ALL32 -check-prefix=NO-AVX512BW -check-prefix=AVX2 4 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f | FileCheck %s -check-prefix=ALL32 -check-prefix=NO-AVX512BW -check-prefix=AVX512 5 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx512f,+avx512bw | FileCheck %s -check-prefix=ALL32 -check-prefix=AVX512 -check-prefix=AVX512BW 6 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX-64 7 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s -check-prefix=ALL64 -check-prefix=NO-AVX512BW-64 -check-prefix=AVX2-64 8 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s -check-prefix=ALL64 -check-prefix=NO-AVX512BW-64 -check-prefix=AVX512F-64 9 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s -check-prefix=ALL64 -check-prefix=AVX512F-64 -check-prefix=AVX512BW-64 10 11 ;===-----------------------------------------------------------------------------=== 12 ; This test checks the ability to recognize a cross element pattern of 13 ; constants and perform the load via broadcasting a smaller constant 14 ; vector. 15 ; For example: 16 ; <i32 0, i32 1, i32 0, i32 1> => broadcast of the constant vector <i32 0, i32 1> 17 ;===-----------------------------------------------------------------------------=== 18 19 define <16 x i8> @f16xi8_i16(<16 x i8> %a) { 20 ; AVX-LABEL: f16xi8_i16: 21 ; AVX: # %bb.0: 22 ; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 23 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 24 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 25 ; AVX-NEXT: retl 26 ; 27 ; ALL32-LABEL: f16xi8_i16: 28 ; ALL32: # %bb.0: 29 ; ALL32-NEXT: vpbroadcastw {{.*#+}} xmm1 = [256,256,256,256,256,256,256,256] 30 ; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0 31 ; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 32 ; ALL32-NEXT: retl 33 ; 34 ; AVX-64-LABEL: f16xi8_i16: 35 ; AVX-64: # %bb.0: 36 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 37 ; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 38 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 39 ; AVX-64-NEXT: retq 40 ; 41 ; ALL64-LABEL: f16xi8_i16: 42 ; ALL64: # %bb.0: 43 ; ALL64-NEXT: vpbroadcastw {{.*#+}} xmm1 = [256,256,256,256,256,256,256,256] 44 ; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 45 ; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 46 ; ALL64-NEXT: retq 47 %res1 = add <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %a 48 %res2 = and <16 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %res1 49 ret <16 x i8> %res2 50 } 51 52 53 define <16 x i8> @f16xi8_i32(<16 x i8> %a) { 54 ; AVX-LABEL: f16xi8_i32: 55 ; AVX: # %bb.0: 56 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37] 57 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 58 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 59 ; AVX-NEXT: retl 60 ; 61 ; ALL32-LABEL: f16xi8_i32: 62 ; ALL32: # %bb.0: 63 ; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976] 64 ; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0 65 ; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 66 ; ALL32-NEXT: retl 67 ; 68 ; AVX-64-LABEL: f16xi8_i32: 69 ; AVX-64: # %bb.0: 70 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37] 71 ; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 72 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 73 ; AVX-64-NEXT: retq 74 ; 75 ; ALL64-LABEL: f16xi8_i32: 76 ; ALL64: # %bb.0: 77 ; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [50462976,50462976,50462976,50462976] 78 ; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 79 ; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 80 ; ALL64-NEXT: retq 81 %res1 = add <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a 82 %res2 = and <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1 83 ret <16 x i8> %res2 84 } 85 86 87 define <16 x i8> @f16xi8_i64(<16 x i8> %a) { 88 ; AVX-LABEL: f16xi8_i64: 89 ; AVX: # %bb.0: 90 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 91 ; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 92 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 93 ; AVX-NEXT: retl 94 ; 95 ; ALL32-LABEL: f16xi8_i64: 96 ; ALL32: # %bb.0: 97 ; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 98 ; ALL32-NEXT: vpaddb %xmm1, %xmm0, %xmm0 99 ; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 100 ; ALL32-NEXT: retl 101 ; 102 ; AVX-64-LABEL: f16xi8_i64: 103 ; AVX-64: # %bb.0: 104 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 105 ; AVX-64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 106 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 107 ; AVX-64-NEXT: retq 108 ; 109 ; ALL64-LABEL: f16xi8_i64: 110 ; ALL64: # %bb.0: 111 ; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [506097522914230528,506097522914230528] 112 ; ALL64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 113 ; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 114 ; ALL64-NEXT: retq 115 %res1 = add <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a 116 %res2 = and <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1 117 ret <16 x i8> %res2 118 } 119 120 121 define <32 x i8> @f32xi8_i16(<32 x i8> %a) { 122 ; AVX-LABEL: f32xi8_i16: 123 ; AVX: # %bb.0: 124 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 125 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 126 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 127 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 128 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 129 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 130 ; AVX-NEXT: retl 131 ; 132 ; ALL32-LABEL: f32xi8_i16: 133 ; ALL32: # %bb.0: 134 ; ALL32-NEXT: vpbroadcastw {{.*#+}} ymm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256] 135 ; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 136 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 137 ; ALL32-NEXT: retl 138 ; 139 ; AVX-64-LABEL: f32xi8_i16: 140 ; AVX-64: # %bb.0: 141 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 142 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 143 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 144 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 145 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 146 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 147 ; AVX-64-NEXT: retq 148 ; 149 ; ALL64-LABEL: f32xi8_i16: 150 ; ALL64: # %bb.0: 151 ; ALL64-NEXT: vpbroadcastw {{.*#+}} ymm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256] 152 ; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 153 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 154 ; ALL64-NEXT: retq 155 %res1 = add <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %a 156 %res2 = and <32 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %res1 157 ret <32 x i8> %res2 158 } 159 160 161 define <32 x i8> @f32xi8_i32(<32 x i8> %a) { 162 ; AVX-LABEL: f32xi8_i32: 163 ; AVX: # %bb.0: 164 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 165 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37] 166 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 167 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 168 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 169 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 170 ; AVX-NEXT: retl 171 ; 172 ; ALL32-LABEL: f32xi8_i32: 173 ; ALL32: # %bb.0: 174 ; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976] 175 ; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 176 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 177 ; ALL32-NEXT: retl 178 ; 179 ; AVX-64-LABEL: f32xi8_i32: 180 ; AVX-64: # %bb.0: 181 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 182 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37] 183 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 184 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 185 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 186 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 187 ; AVX-64-NEXT: retq 188 ; 189 ; ALL64-LABEL: f32xi8_i32: 190 ; ALL64: # %bb.0: 191 ; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976] 192 ; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 193 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 194 ; ALL64-NEXT: retq 195 %res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a 196 %res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1 197 ret <32 x i8> %res2 198 } 199 200 201 define <32 x i8> @f32xi8_i64(<32 x i8> %a) { 202 ; AVX-LABEL: f32xi8_i64: 203 ; AVX: # %bb.0: 204 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 205 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 206 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 207 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 208 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 209 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 210 ; AVX-NEXT: retl 211 ; 212 ; ALL32-LABEL: f32xi8_i64: 213 ; ALL32: # %bb.0: 214 ; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275] 215 ; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 216 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 217 ; ALL32-NEXT: retl 218 ; 219 ; AVX-64-LABEL: f32xi8_i64: 220 ; AVX-64: # %bb.0: 221 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 222 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 223 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 224 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 225 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 226 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 227 ; AVX-64-NEXT: retq 228 ; 229 ; ALL64-LABEL: f32xi8_i64: 230 ; ALL64: # %bb.0: 231 ; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528] 232 ; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 233 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 234 ; ALL64-NEXT: retq 235 %res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a 236 %res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1 237 ret <32 x i8> %res2 238 } 239 240 241 define <32 x i8> @f32xi8_i128(<32 x i8> %a) { 242 ; AVX-LABEL: f32xi8_i128: 243 ; AVX: # %bb.0: 244 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 245 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 246 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1 247 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0 248 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 249 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 250 ; AVX-NEXT: retl 251 ; 252 ; ALL32-LABEL: f32xi8_i128: 253 ; ALL32: # %bb.0: 254 ; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 255 ; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 256 ; ALL32-NEXT: vpaddb %ymm1, %ymm0, %ymm0 257 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 258 ; ALL32-NEXT: retl 259 ; 260 ; AVX-64-LABEL: f32xi8_i128: 261 ; AVX-64: # %bb.0: 262 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 263 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 264 ; AVX-64-NEXT: vpaddb %xmm2, %xmm1, %xmm1 265 ; AVX-64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 266 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 267 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 268 ; AVX-64-NEXT: retq 269 ; 270 ; ALL64-LABEL: f32xi8_i128: 271 ; ALL64: # %bb.0: 272 ; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 273 ; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 274 ; ALL64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 275 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 276 ; ALL64-NEXT: retq 277 %res1 = add <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %a 278 %res2 = and <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %res1 279 ret <32 x i8> %res2 280 } 281 282 283 define <64 x i8> @f64xi8_i16(<64 x i8> %a) { 284 ; AVX-LABEL: f64xi8_i16: 285 ; AVX: # %bb.0: 286 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 287 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 288 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 289 ; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1 290 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 291 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 292 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 293 ; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 294 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 295 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 296 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 297 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 298 ; AVX-NEXT: retl 299 ; 300 ; NO-AVX512BW-LABEL: f64xi8_i16: 301 ; NO-AVX512BW: # %bb.0: 302 ; NO-AVX512BW-NEXT: vpbroadcastw {{.*#+}} ymm2 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256] 303 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 304 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 305 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 306 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 307 ; NO-AVX512BW-NEXT: retl 308 ; 309 ; AVX512BW-LABEL: f64xi8_i16: 310 ; AVX512BW: # %bb.0: 311 ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256] 312 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 313 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 314 ; AVX512BW-NEXT: retl 315 ; 316 ; AVX-64-LABEL: f64xi8_i16: 317 ; AVX-64: # %bb.0: 318 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 319 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 320 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 321 ; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1 322 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 323 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 324 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 325 ; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 326 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 327 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 328 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 329 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 330 ; AVX-64-NEXT: retq 331 ; 332 ; NO-AVX512BW-64-LABEL: f64xi8_i16: 333 ; NO-AVX512BW-64: # %bb.0: 334 ; NO-AVX512BW-64-NEXT: vpbroadcastw {{.*#+}} ymm2 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256] 335 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 336 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 337 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 338 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 339 ; NO-AVX512BW-64-NEXT: retq 340 ; 341 ; AVX512BW-64-LABEL: f64xi8_i16: 342 ; AVX512BW-64: # %bb.0: 343 ; AVX512BW-64-NEXT: vpbroadcastw {{.*#+}} zmm1 = [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,256] 344 ; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 345 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 346 ; AVX512BW-64-NEXT: retq 347 %res1 = add <64 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %a 348 %res2 = and <64 x i8> <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>, %res1 349 ret <64 x i8> %res2 350 } 351 352 353 define <64 x i8> @f64i8_i32(<64 x i8> %a) { 354 ; AVX-LABEL: f64i8_i32: 355 ; AVX: # %bb.0: 356 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 357 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37] 358 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 359 ; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1 360 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 361 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 362 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 363 ; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 364 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 365 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 366 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 367 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 368 ; AVX-NEXT: retl 369 ; 370 ; NO-AVX512BW-LABEL: f64i8_i32: 371 ; NO-AVX512BW: # %bb.0: 372 ; NO-AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976] 373 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 374 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 375 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 376 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 377 ; NO-AVX512BW-NEXT: retl 378 ; 379 ; AVX512BW-LABEL: f64i8_i32: 380 ; AVX512BW: # %bb.0: 381 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976] 382 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 383 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 384 ; AVX512BW-NEXT: retl 385 ; 386 ; AVX-64-LABEL: f64i8_i32: 387 ; AVX-64: # %bb.0: 388 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 389 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm3 = [3.82047143E-37,3.82047143E-37,3.82047143E-37,3.82047143E-37] 390 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 391 ; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1 392 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 393 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 394 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 395 ; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 396 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 397 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 398 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 399 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 400 ; AVX-64-NEXT: retq 401 ; 402 ; NO-AVX512BW-64-LABEL: f64i8_i32: 403 ; NO-AVX512BW-64: # %bb.0: 404 ; NO-AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976] 405 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 406 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 407 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 408 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 409 ; NO-AVX512BW-64-NEXT: retq 410 ; 411 ; AVX512BW-64-LABEL: f64i8_i32: 412 ; AVX512BW-64: # %bb.0: 413 ; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976,50462976] 414 ; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 415 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 416 ; AVX512BW-64-NEXT: retq 417 %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %a 418 %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3, i8 0, i8 1, i8 2, i8 3>, %res1 419 ret <64 x i8> %res2 420 } 421 422 423 define <64 x i8> @f64xi8_i64(<64 x i8> %a) { 424 ; AVX-LABEL: f64xi8_i64: 425 ; AVX: # %bb.0: 426 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 427 ; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0] 428 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 429 ; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1 430 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 431 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 432 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 433 ; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 434 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 435 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 436 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 437 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 438 ; AVX-NEXT: retl 439 ; 440 ; NO-AVX512BW-LABEL: f64xi8_i64: 441 ; NO-AVX512BW: # %bb.0: 442 ; NO-AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275] 443 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 444 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 445 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 446 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 447 ; NO-AVX512BW-NEXT: retl 448 ; 449 ; AVX512BW-LABEL: f64xi8_i64: 450 ; AVX512BW: # %bb.0: 451 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275,7.9499288951273625E-275] 452 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 453 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 454 ; AVX512BW-NEXT: retl 455 ; 456 ; AVX-64-LABEL: f64xi8_i64: 457 ; AVX-64: # %bb.0: 458 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 459 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0] 460 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 461 ; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1 462 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 463 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 464 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 465 ; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 466 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 467 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 468 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 469 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 470 ; AVX-64-NEXT: retq 471 ; 472 ; NO-AVX512BW-64-LABEL: f64xi8_i64: 473 ; NO-AVX512BW-64: # %bb.0: 474 ; NO-AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528] 475 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 476 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 477 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 478 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 479 ; NO-AVX512BW-64-NEXT: retq 480 ; 481 ; AVX512BW-64-LABEL: f64xi8_i64: 482 ; AVX512BW-64: # %bb.0: 483 ; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528,506097522914230528] 484 ; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 485 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 486 ; AVX512BW-64-NEXT: retq 487 %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %a 488 %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %res1 489 ret <64 x i8> %res2 490 } 491 492 493 define <64 x i8> @f64xi8_i128(<64 x i8> %a) { 494 ; AVX-LABEL: f64xi8_i128: 495 ; AVX: # %bb.0: 496 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 497 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 498 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 499 ; AVX-NEXT: vpaddb %xmm3, %xmm1, %xmm1 500 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 501 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 502 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 503 ; AVX-NEXT: vpaddb %xmm3, %xmm0, %xmm0 504 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 505 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 506 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 507 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 508 ; AVX-NEXT: retl 509 ; 510 ; NO-AVX512BW-LABEL: f64xi8_i128: 511 ; NO-AVX512BW: # %bb.0: 512 ; NO-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 513 ; NO-AVX512BW-NEXT: # ymm2 = mem[0,1,0,1] 514 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 515 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 516 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 517 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 518 ; NO-AVX512BW-NEXT: retl 519 ; 520 ; AVX512BW-LABEL: f64xi8_i128: 521 ; AVX512BW: # %bb.0: 522 ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 523 ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 524 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 525 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 526 ; AVX512BW-NEXT: retl 527 ; 528 ; AVX-64-LABEL: f64xi8_i128: 529 ; AVX-64: # %bb.0: 530 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 531 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 532 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 533 ; AVX-64-NEXT: vpaddb %xmm3, %xmm1, %xmm1 534 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 535 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 536 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 537 ; AVX-64-NEXT: vpaddb %xmm3, %xmm0, %xmm0 538 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 539 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 540 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 541 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 542 ; AVX-64-NEXT: retq 543 ; 544 ; NO-AVX512BW-64-LABEL: f64xi8_i128: 545 ; NO-AVX512BW-64: # %bb.0: 546 ; NO-AVX512BW-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 547 ; NO-AVX512BW-64-NEXT: # ymm2 = mem[0,1,0,1] 548 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 549 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 550 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 551 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 552 ; NO-AVX512BW-64-NEXT: retq 553 ; 554 ; AVX512BW-64-LABEL: f64xi8_i128: 555 ; AVX512BW-64: # %bb.0: 556 ; AVX512BW-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 557 ; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 558 ; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 559 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 560 ; AVX512BW-64-NEXT: retq 561 %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %a 562 %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, %res1 563 ret <64 x i8> %res2 564 } 565 566 567 define <64 x i8> @f64xi8_i256(<64 x i8> %a) { 568 ; AVX-LABEL: f64xi8_i256: 569 ; AVX: # %bb.0: 570 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 571 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 572 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 573 ; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 574 ; AVX-NEXT: vpaddb %xmm4, %xmm1, %xmm1 575 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 576 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 577 ; AVX-NEXT: vpaddb %xmm3, %xmm2, %xmm2 578 ; AVX-NEXT: vpaddb %xmm4, %xmm0, %xmm0 579 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 580 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 581 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 582 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 583 ; AVX-NEXT: retl 584 ; 585 ; NO-AVX512BW-LABEL: f64xi8_i256: 586 ; NO-AVX512BW: # %bb.0: 587 ; NO-AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 588 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 589 ; NO-AVX512BW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 590 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 591 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 592 ; NO-AVX512BW-NEXT: retl 593 ; 594 ; AVX512BW-LABEL: f64xi8_i256: 595 ; AVX512BW: # %bb.0: 596 ; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 597 ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 598 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 599 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 600 ; AVX512BW-NEXT: retl 601 ; 602 ; AVX-64-LABEL: f64xi8_i256: 603 ; AVX-64: # %bb.0: 604 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 605 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 606 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 607 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 608 ; AVX-64-NEXT: vpaddb %xmm4, %xmm1, %xmm1 609 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 610 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 611 ; AVX-64-NEXT: vpaddb %xmm3, %xmm2, %xmm2 612 ; AVX-64-NEXT: vpaddb %xmm4, %xmm0, %xmm0 613 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 614 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 615 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 616 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 617 ; AVX-64-NEXT: retq 618 ; 619 ; NO-AVX512BW-64-LABEL: f64xi8_i256: 620 ; NO-AVX512BW-64: # %bb.0: 621 ; NO-AVX512BW-64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 622 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm1, %ymm1 623 ; NO-AVX512BW-64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 624 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 625 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 626 ; NO-AVX512BW-64-NEXT: retq 627 ; 628 ; AVX512BW-64-LABEL: f64xi8_i256: 629 ; AVX512BW-64: # %bb.0: 630 ; AVX512BW-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31] 631 ; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 632 ; AVX512BW-64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 633 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 634 ; AVX512BW-64-NEXT: retq 635 %res1 = add <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, %a 636 %res2 = and <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, %res1 637 ret <64 x i8> %res2 638 } 639 640 641 define <8 x i16> @f8xi16_i32(<8 x i16> %a) { 642 ; AVX-LABEL: f8xi16_i32: 643 ; AVX: # %bb.0: 644 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41] 645 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 646 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 647 ; AVX-NEXT: retl 648 ; 649 ; ALL32-LABEL: f8xi16_i32: 650 ; ALL32: # %bb.0: 651 ; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65536,65536,65536,65536] 652 ; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 653 ; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 654 ; ALL32-NEXT: retl 655 ; 656 ; AVX-64-LABEL: f8xi16_i32: 657 ; AVX-64: # %bb.0: 658 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41] 659 ; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 660 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 661 ; AVX-64-NEXT: retq 662 ; 663 ; ALL64-LABEL: f8xi16_i32: 664 ; ALL64: # %bb.0: 665 ; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65536,65536,65536,65536] 666 ; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 667 ; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 668 ; ALL64-NEXT: retq 669 %res1 = add <8 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a 670 %res2 = and <8 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1 671 ret <8 x i16> %res2 672 } 673 674 675 define <8 x i16> @f8xi16_i64(<8 x i16> %a) { 676 ; AVX-LABEL: f8xi16_i64: 677 ; AVX: # %bb.0: 678 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 679 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 680 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 681 ; AVX-NEXT: retl 682 ; 683 ; ALL32-LABEL: f8xi16_i64: 684 ; ALL32: # %bb.0: 685 ; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 686 ; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 687 ; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 688 ; ALL32-NEXT: retl 689 ; 690 ; AVX-64-LABEL: f8xi16_i64: 691 ; AVX-64: # %bb.0: 692 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 693 ; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 694 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 695 ; AVX-64-NEXT: retq 696 ; 697 ; ALL64-LABEL: f8xi16_i64: 698 ; ALL64: # %bb.0: 699 ; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [844433520132096,844433520132096] 700 ; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 701 ; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 702 ; ALL64-NEXT: retq 703 %res1 = add <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a 704 %res2 = and <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1 705 ret <8 x i16> %res2 706 } 707 708 709 define <16 x i16> @f16xi16_i32(<16 x i16> %a) { 710 ; AVX-LABEL: f16xi16_i32: 711 ; AVX: # %bb.0: 712 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 713 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41] 714 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 715 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 716 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 717 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 718 ; AVX-NEXT: retl 719 ; 720 ; ALL32-LABEL: f16xi16_i32: 721 ; ALL32: # %bb.0: 722 ; ALL32-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65536,65536,65536,65536,65536,65536,65536,65536] 723 ; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0 724 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 725 ; ALL32-NEXT: retl 726 ; 727 ; AVX-64-LABEL: f16xi16_i32: 728 ; AVX-64: # %bb.0: 729 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 730 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm2 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41] 731 ; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 732 ; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 733 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 734 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 735 ; AVX-64-NEXT: retq 736 ; 737 ; ALL64-LABEL: f16xi16_i32: 738 ; ALL64: # %bb.0: 739 ; ALL64-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65536,65536,65536,65536,65536,65536,65536,65536] 740 ; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 741 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 742 ; ALL64-NEXT: retq 743 %res1 = add <16 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a 744 %res2 = and <16 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1 745 ret <16 x i16> %res2 746 } 747 748 749 define <16 x i16> @f16xi16_i64(<16 x i16> %a) { 750 ; AVX-LABEL: f16xi16_i64: 751 ; AVX: # %bb.0: 752 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 753 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 754 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 755 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 756 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 757 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 758 ; AVX-NEXT: retl 759 ; 760 ; ALL32-LABEL: f16xi16_i64: 761 ; ALL32: # %bb.0: 762 ; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309] 763 ; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0 764 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 765 ; ALL32-NEXT: retl 766 ; 767 ; AVX-64-LABEL: f16xi16_i64: 768 ; AVX-64: # %bb.0: 769 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 770 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 771 ; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 772 ; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 773 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 774 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 775 ; AVX-64-NEXT: retq 776 ; 777 ; ALL64-LABEL: f16xi16_i64: 778 ; ALL64: # %bb.0: 779 ; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [844433520132096,844433520132096,844433520132096,844433520132096] 780 ; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 781 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 782 ; ALL64-NEXT: retq 783 %res1 = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a 784 %res2 = and <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1 785 ret <16 x i16> %res2 786 } 787 788 789 define <16 x i16> @f16xi16_i128(<16 x i16> %a) { 790 ; AVX-LABEL: f16xi16_i128: 791 ; AVX: # %bb.0: 792 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 793 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7] 794 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 795 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0 796 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 797 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 798 ; AVX-NEXT: retl 799 ; 800 ; ALL32-LABEL: f16xi16_i128: 801 ; ALL32: # %bb.0: 802 ; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 803 ; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 804 ; ALL32-NEXT: vpaddw %ymm1, %ymm0, %ymm0 805 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 806 ; ALL32-NEXT: retl 807 ; 808 ; AVX-64-LABEL: f16xi16_i128: 809 ; AVX-64: # %bb.0: 810 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 811 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7] 812 ; AVX-64-NEXT: vpaddw %xmm2, %xmm1, %xmm1 813 ; AVX-64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 814 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 815 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 816 ; AVX-64-NEXT: retq 817 ; 818 ; ALL64-LABEL: f16xi16_i128: 819 ; ALL64: # %bb.0: 820 ; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 821 ; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 822 ; ALL64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 823 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 824 ; ALL64-NEXT: retq 825 %res1 = add <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %a 826 %res2 = and <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %res1 827 ret <16 x i16> %res2 828 } 829 830 831 define <32 x i16> @f32xi16_i32(<32 x i16> %a) { 832 ; AVX-LABEL: f32xi16_i32: 833 ; AVX: # %bb.0: 834 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 835 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm3 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41] 836 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 837 ; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1 838 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 839 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 840 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 841 ; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0 842 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 843 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 844 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 845 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 846 ; AVX-NEXT: retl 847 ; 848 ; NO-AVX512BW-LABEL: f32xi16_i32: 849 ; NO-AVX512BW: # %bb.0: 850 ; NO-AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536] 851 ; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm1, %ymm1 852 ; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm0, %ymm0 853 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 854 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 855 ; NO-AVX512BW-NEXT: retl 856 ; 857 ; AVX512BW-LABEL: f32xi16_i32: 858 ; AVX512BW: # %bb.0: 859 ; AVX512BW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536] 860 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 861 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 862 ; AVX512BW-NEXT: retl 863 ; 864 ; AVX-64-LABEL: f32xi16_i32: 865 ; AVX-64: # %bb.0: 866 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 867 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm3 = [9.18354962E-41,9.18354962E-41,9.18354962E-41,9.18354962E-41] 868 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 869 ; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1 870 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 871 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 872 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 873 ; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 874 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 875 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 876 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 877 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 878 ; AVX-64-NEXT: retq 879 ; 880 ; NO-AVX512BW-64-LABEL: f32xi16_i32: 881 ; NO-AVX512BW-64: # %bb.0: 882 ; NO-AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65536,65536,65536,65536,65536,65536,65536,65536] 883 ; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 884 ; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 885 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 886 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 887 ; NO-AVX512BW-64-NEXT: retq 888 ; 889 ; AVX512BW-64-LABEL: f32xi16_i32: 890 ; AVX512BW-64: # %bb.0: 891 ; AVX512BW-64-NEXT: vpbroadcastd {{.*#+}} zmm1 = [65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536,65536] 892 ; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 893 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 894 ; AVX512BW-64-NEXT: retq 895 %res1 = add <32 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %a 896 %res2 = and <32 x i16> <i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1, i16 0, i16 1>, %res1 897 ret <32 x i16> %res2 898 } 899 900 901 define <32 x i16> @f32xi16_i64(<32 x i16> %a) { 902 ; AVX-LABEL: f32xi16_i64: 903 ; AVX: # %bb.0: 904 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 905 ; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0] 906 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 907 ; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1 908 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 909 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 910 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 911 ; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0 912 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 913 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 914 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 915 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 916 ; AVX-NEXT: retl 917 ; 918 ; NO-AVX512BW-LABEL: f32xi16_i64: 919 ; NO-AVX512BW: # %bb.0: 920 ; NO-AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309] 921 ; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm1, %ymm1 922 ; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm0, %ymm0 923 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 924 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 925 ; NO-AVX512BW-NEXT: retl 926 ; 927 ; AVX512BW-LABEL: f32xi16_i64: 928 ; AVX512BW: # %bb.0: 929 ; AVX512BW-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309,4.1720559249406128E-309] 930 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 931 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 932 ; AVX512BW-NEXT: retl 933 ; 934 ; AVX-64-LABEL: f32xi16_i64: 935 ; AVX-64: # %bb.0: 936 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 937 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0] 938 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 939 ; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1 940 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 941 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 942 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 943 ; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 944 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 945 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 946 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 947 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 948 ; AVX-64-NEXT: retq 949 ; 950 ; NO-AVX512BW-64-LABEL: f32xi16_i64: 951 ; NO-AVX512BW-64: # %bb.0: 952 ; NO-AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [844433520132096,844433520132096,844433520132096,844433520132096] 953 ; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 954 ; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 955 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 956 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 957 ; NO-AVX512BW-64-NEXT: retq 958 ; 959 ; AVX512BW-64-LABEL: f32xi16_i64: 960 ; AVX512BW-64: # %bb.0: 961 ; AVX512BW-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096,844433520132096] 962 ; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 963 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 964 ; AVX512BW-64-NEXT: retq 965 %res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %a 966 %res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3, i16 0, i16 1, i16 2, i16 3>, %res1 967 ret <32 x i16> %res2 968 } 969 970 971 define <32 x i16> @f32xi16_i128(<32 x i16> %a) { 972 ; AVX-LABEL: f32xi16_i128: 973 ; AVX: # %bb.0: 974 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 975 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7] 976 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 977 ; AVX-NEXT: vpaddw %xmm3, %xmm1, %xmm1 978 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 979 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 980 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 981 ; AVX-NEXT: vpaddw %xmm3, %xmm0, %xmm0 982 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 983 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 984 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 985 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 986 ; AVX-NEXT: retl 987 ; 988 ; NO-AVX512BW-LABEL: f32xi16_i128: 989 ; NO-AVX512BW: # %bb.0: 990 ; NO-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 991 ; NO-AVX512BW-NEXT: # ymm2 = mem[0,1,0,1] 992 ; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm1, %ymm1 993 ; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm0, %ymm0 994 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 995 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 996 ; NO-AVX512BW-NEXT: retl 997 ; 998 ; AVX512BW-LABEL: f32xi16_i128: 999 ; AVX512BW: # %bb.0: 1000 ; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1001 ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1002 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 1003 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 1004 ; AVX512BW-NEXT: retl 1005 ; 1006 ; AVX-64-LABEL: f32xi16_i128: 1007 ; AVX-64: # %bb.0: 1008 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 1009 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7] 1010 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1011 ; AVX-64-NEXT: vpaddw %xmm3, %xmm1, %xmm1 1012 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1013 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1014 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1015 ; AVX-64-NEXT: vpaddw %xmm3, %xmm0, %xmm0 1016 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1017 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1018 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1019 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1020 ; AVX-64-NEXT: retq 1021 ; 1022 ; NO-AVX512BW-64-LABEL: f32xi16_i128: 1023 ; NO-AVX512BW-64: # %bb.0: 1024 ; NO-AVX512BW-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1025 ; NO-AVX512BW-64-NEXT: # ymm2 = mem[0,1,0,1] 1026 ; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 1027 ; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 1028 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1029 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1030 ; NO-AVX512BW-64-NEXT: retq 1031 ; 1032 ; AVX512BW-64-LABEL: f32xi16_i128: 1033 ; AVX512BW-64: # %bb.0: 1034 ; AVX512BW-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1035 ; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1036 ; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 1037 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1038 ; AVX512BW-64-NEXT: retq 1039 %res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %a 1040 %res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, %res1 1041 ret <32 x i16> %res2 1042 } 1043 1044 1045 define <32 x i16> @f32xi16_i256(<32 x i16> %a) { 1046 ; AVX-LABEL: f32xi16_i256: 1047 ; AVX: # %bb.0: 1048 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 1049 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,10,11,12,13,14,15] 1050 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1051 ; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7] 1052 ; AVX-NEXT: vpaddw %xmm4, %xmm1, %xmm1 1053 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1054 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1055 ; AVX-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1056 ; AVX-NEXT: vpaddw %xmm4, %xmm0, %xmm0 1057 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1058 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1059 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 1060 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 1061 ; AVX-NEXT: retl 1062 ; 1063 ; NO-AVX512BW-LABEL: f32xi16_i256: 1064 ; NO-AVX512BW: # %bb.0: 1065 ; NO-AVX512BW-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1066 ; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm1, %ymm1 1067 ; NO-AVX512BW-NEXT: vpaddw %ymm2, %ymm0, %ymm0 1068 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm0, %ymm0 1069 ; NO-AVX512BW-NEXT: vpand %ymm2, %ymm1, %ymm1 1070 ; NO-AVX512BW-NEXT: retl 1071 ; 1072 ; AVX512BW-LABEL: f32xi16_i256: 1073 ; AVX512BW: # %bb.0: 1074 ; AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1075 ; AVX512BW-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1076 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 1077 ; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 1078 ; AVX512BW-NEXT: retl 1079 ; 1080 ; AVX-64-LABEL: f32xi16_i256: 1081 ; AVX-64: # %bb.0: 1082 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 1083 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [8,9,10,11,12,13,14,15] 1084 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1085 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7] 1086 ; AVX-64-NEXT: vpaddw %xmm4, %xmm1, %xmm1 1087 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1088 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1089 ; AVX-64-NEXT: vpaddw %xmm3, %xmm2, %xmm2 1090 ; AVX-64-NEXT: vpaddw %xmm4, %xmm0, %xmm0 1091 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1092 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1093 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1094 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1095 ; AVX-64-NEXT: retq 1096 ; 1097 ; NO-AVX512BW-64-LABEL: f32xi16_i256: 1098 ; NO-AVX512BW-64: # %bb.0: 1099 ; NO-AVX512BW-64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1100 ; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm1, %ymm1 1101 ; NO-AVX512BW-64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 1102 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1103 ; NO-AVX512BW-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1104 ; NO-AVX512BW-64-NEXT: retq 1105 ; 1106 ; AVX512BW-64-LABEL: f32xi16_i256: 1107 ; AVX512BW-64: # %bb.0: 1108 ; AVX512BW-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 1109 ; AVX512BW-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1110 ; AVX512BW-64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 1111 ; AVX512BW-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1112 ; AVX512BW-64-NEXT: retq 1113 %res1 = add <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, %a 1114 %res2 = and <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, %res1 1115 ret <32 x i16> %res2 1116 } 1117 1118 1119 1120 define <4 x i32> @f4xi32_i64(<4 x i32> %a) { 1121 ; AVX-LABEL: f4xi32_i64: 1122 ; AVX: # %bb.0: 1123 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1124 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1125 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 1126 ; AVX-NEXT: retl 1127 ; 1128 ; ALL32-LABEL: f4xi32_i64: 1129 ; ALL32: # %bb.0: 1130 ; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1131 ; ALL32-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1132 ; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 1133 ; ALL32-NEXT: retl 1134 ; 1135 ; AVX-64-LABEL: f4xi32_i64: 1136 ; AVX-64: # %bb.0: 1137 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1138 ; AVX-64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1139 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 1140 ; AVX-64-NEXT: retq 1141 ; 1142 ; ALL64-LABEL: f4xi32_i64: 1143 ; ALL64: # %bb.0: 1144 ; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4294967296,4294967296] 1145 ; ALL64-NEXT: vpaddd %xmm1, %xmm0, %xmm0 1146 ; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 1147 ; ALL64-NEXT: retq 1148 %res1 = add <4 x i32> <i32 0, i32 1, i32 0, i32 1>, %a 1149 %res2 = and <4 x i32> <i32 0, i32 1, i32 0, i32 1>, %res1 1150 ret <4 x i32> %res2 1151 } 1152 1153 1154 define <8 x i32> @f8xi32_i64(<8 x i32> %a) { 1155 ; AVX-LABEL: f8xi32_i64: 1156 ; AVX: # %bb.0: 1157 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1158 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 1159 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1160 ; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1161 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1162 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 1163 ; AVX-NEXT: retl 1164 ; 1165 ; ALL32-LABEL: f8xi32_i64: 1166 ; ALL32: # %bb.0: 1167 ; ALL32-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314] 1168 ; ALL32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1169 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 1170 ; ALL32-NEXT: retl 1171 ; 1172 ; AVX-64-LABEL: f8xi32_i64: 1173 ; AVX-64: # %bb.0: 1174 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 1175 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0] 1176 ; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1177 ; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1178 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1179 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1180 ; AVX-64-NEXT: retq 1181 ; 1182 ; ALL64-LABEL: f8xi32_i64: 1183 ; ALL64: # %bb.0: 1184 ; ALL64-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967296,4294967296,4294967296,4294967296] 1185 ; ALL64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1186 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 1187 ; ALL64-NEXT: retq 1188 %res1 = add <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %a 1189 %res2 = and <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %res1 1190 ret <8 x i32> %res2 1191 } 1192 1193 1194 define <8 x i32> @f8xi32_i128(<8 x i32> %a) { 1195 ; AVX-LABEL: f8xi32_i128: 1196 ; AVX: # %bb.0: 1197 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1198 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3] 1199 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1200 ; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1201 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1202 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 1203 ; AVX-NEXT: retl 1204 ; 1205 ; ALL32-LABEL: f8xi32_i128: 1206 ; ALL32: # %bb.0: 1207 ; ALL32-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3] 1208 ; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 1209 ; ALL32-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1210 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 1211 ; ALL32-NEXT: retl 1212 ; 1213 ; AVX-64-LABEL: f8xi32_i128: 1214 ; AVX-64: # %bb.0: 1215 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 1216 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3] 1217 ; AVX-64-NEXT: vpaddd %xmm2, %xmm1, %xmm1 1218 ; AVX-64-NEXT: vpaddd %xmm2, %xmm0, %xmm0 1219 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1220 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1221 ; AVX-64-NEXT: retq 1222 ; 1223 ; ALL64-LABEL: f8xi32_i128: 1224 ; ALL64: # %bb.0: 1225 ; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,2,3,0,1,2,3] 1226 ; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 1227 ; ALL64-NEXT: vpaddd %ymm1, %ymm0, %ymm0 1228 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 1229 ; ALL64-NEXT: retq 1230 %res1 = add <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a 1231 %res2 = and <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1 1232 ret <8 x i32> %res2 1233 } 1234 1235 1236 define <16 x i32> @f16xi32_i64(<16 x i32> %a) { 1237 ; AVX-LABEL: f16xi32_i64: 1238 ; AVX: # %bb.0: 1239 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 1240 ; AVX-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0] 1241 ; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1242 ; AVX-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1243 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1244 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1245 ; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1246 ; AVX-NEXT: vpaddd %xmm3, %xmm0, %xmm0 1247 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1248 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1] 1249 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 1250 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 1251 ; AVX-NEXT: retl 1252 ; 1253 ; AVX2-LABEL: f16xi32_i64: 1254 ; AVX2: # %bb.0: 1255 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314] 1256 ; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1257 ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 1258 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1259 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1260 ; AVX2-NEXT: retl 1261 ; 1262 ; AVX512-LABEL: f16xi32_i64: 1263 ; AVX512: # %bb.0: 1264 ; AVX512-NEXT: vpbroadcastq {{.*#+}} zmm1 = [2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314,2.1219957909652723E-314] 1265 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1266 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 1267 ; AVX512-NEXT: retl 1268 ; 1269 ; AVX-64-LABEL: f16xi32_i64: 1270 ; AVX-64: # %bb.0: 1271 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 1272 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm3 = mem[0,0] 1273 ; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1274 ; AVX-64-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1275 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1276 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1277 ; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1278 ; AVX-64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 1279 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1280 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,0,1,0,1,0,1] 1281 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1282 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1283 ; AVX-64-NEXT: retq 1284 ; 1285 ; AVX2-64-LABEL: f16xi32_i64: 1286 ; AVX2-64: # %bb.0: 1287 ; AVX2-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4294967296,4294967296,4294967296,4294967296] 1288 ; AVX2-64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1289 ; AVX2-64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 1290 ; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1291 ; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1292 ; AVX2-64-NEXT: retq 1293 ; 1294 ; AVX512F-64-LABEL: f16xi32_i64: 1295 ; AVX512F-64: # %bb.0: 1296 ; AVX512F-64-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296,4294967296] 1297 ; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1298 ; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1299 ; AVX512F-64-NEXT: retq 1300 %res1 = add <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %a 1301 %res2 = and <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>, %res1 1302 ret <16 x i32> %res2 1303 } 1304 1305 1306 define <16 x i32> @f16xi32_i128(<16 x i32> %a) { 1307 ; AVX-LABEL: f16xi32_i128: 1308 ; AVX: # %bb.0: 1309 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 1310 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3] 1311 ; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1312 ; AVX-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1313 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1314 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1315 ; AVX-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1316 ; AVX-NEXT: vpaddd %xmm3, %xmm0, %xmm0 1317 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1318 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3] 1319 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 1320 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 1321 ; AVX-NEXT: retl 1322 ; 1323 ; AVX2-LABEL: f16xi32_i128: 1324 ; AVX2: # %bb.0: 1325 ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3] 1326 ; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1327 ; AVX2-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1328 ; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0 1329 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1330 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1331 ; AVX2-NEXT: retl 1332 ; 1333 ; AVX512-LABEL: f16xi32_i128: 1334 ; AVX512: # %bb.0: 1335 ; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1336 ; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1337 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1338 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 1339 ; AVX512-NEXT: retl 1340 ; 1341 ; AVX-64-LABEL: f16xi32_i128: 1342 ; AVX-64: # %bb.0: 1343 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 1344 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,1,2,3] 1345 ; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1346 ; AVX-64-NEXT: vpaddd %xmm3, %xmm1, %xmm1 1347 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1348 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1349 ; AVX-64-NEXT: vpaddd %xmm3, %xmm2, %xmm2 1350 ; AVX-64-NEXT: vpaddd %xmm3, %xmm0, %xmm0 1351 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1352 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3] 1353 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1354 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1355 ; AVX-64-NEXT: retq 1356 ; 1357 ; AVX2-64-LABEL: f16xi32_i128: 1358 ; AVX2-64: # %bb.0: 1359 ; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,2,3,0,1,2,3] 1360 ; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1361 ; AVX2-64-NEXT: vpaddd %ymm2, %ymm1, %ymm1 1362 ; AVX2-64-NEXT: vpaddd %ymm2, %ymm0, %ymm0 1363 ; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1364 ; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1365 ; AVX2-64-NEXT: retq 1366 ; 1367 ; AVX512F-64-LABEL: f16xi32_i128: 1368 ; AVX512F-64: # %bb.0: 1369 ; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1370 ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1371 ; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 1372 ; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1373 ; AVX512F-64-NEXT: retq 1374 %res1 = add <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %a 1375 %res2 = and <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>, %res1 1376 ret <16 x i32> %res2 1377 } 1378 1379 1380 define <4 x i64> @f4xi64_i128(<4 x i64> %a) { 1381 ; AVX-LABEL: f4xi64_i128: 1382 ; AVX: # %bb.0: 1383 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 1384 ; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,1,0] 1385 ; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1 1386 ; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 1387 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1388 ; AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 1389 ; AVX-NEXT: retl 1390 ; 1391 ; ALL32-LABEL: f4xi64_i128: 1392 ; ALL32: # %bb.0: 1393 ; ALL32-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,1,0,0,0,1,0] 1394 ; ALL32-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1395 ; ALL32-NEXT: vpand %ymm1, %ymm0, %ymm0 1396 ; ALL32-NEXT: retl 1397 ; 1398 ; AVX-64-LABEL: f4xi64_i128: 1399 ; AVX-64: # %bb.0: 1400 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm1 1401 ; AVX-64-NEXT: movl $1, %eax 1402 ; AVX-64-NEXT: vmovq %rax, %xmm2 1403 ; AVX-64-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] 1404 ; AVX-64-NEXT: vpaddq %xmm2, %xmm1, %xmm1 1405 ; AVX-64-NEXT: vpaddq %xmm2, %xmm0, %xmm0 1406 ; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1407 ; AVX-64-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1408 ; AVX-64-NEXT: retq 1409 ; 1410 ; ALL64-LABEL: f4xi64_i128: 1411 ; ALL64: # %bb.0: 1412 ; ALL64-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,1,0,1] 1413 ; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 1414 ; ALL64-NEXT: vpaddq %ymm1, %ymm0, %ymm0 1415 ; ALL64-NEXT: vpand %ymm1, %ymm0, %ymm0 1416 ; ALL64-NEXT: retq 1417 %res1 = add <4 x i64> <i64 0, i64 1, i64 0, i64 1>, %a 1418 %res2 = and <4 x i64> <i64 0, i64 1, i64 0, i64 1>, %res1 1419 ret <4 x i64> %res2 1420 } 1421 1422 1423 define <8 x i64> @f8xi64_i128(<8 x i64> %a) { 1424 ; AVX-LABEL: f8xi64_i128: 1425 ; AVX: # %bb.0: 1426 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 1427 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,1,0] 1428 ; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1429 ; AVX-NEXT: vpaddq %xmm3, %xmm1, %xmm1 1430 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1431 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1432 ; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1433 ; AVX-NEXT: vpaddq %xmm3, %xmm0, %xmm0 1434 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1435 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0] 1436 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 1437 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 1438 ; AVX-NEXT: retl 1439 ; 1440 ; AVX2-LABEL: f8xi64_i128: 1441 ; AVX2: # %bb.0: 1442 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,1,0,0,0,1,0] 1443 ; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1444 ; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 1445 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1446 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1447 ; AVX2-NEXT: retl 1448 ; 1449 ; AVX512-LABEL: f8xi64_i128: 1450 ; AVX512: # %bb.0: 1451 ; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0] 1452 ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 1453 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 1454 ; AVX512-NEXT: retl 1455 ; 1456 ; AVX-64-LABEL: f8xi64_i128: 1457 ; AVX-64: # %bb.0: 1458 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 1459 ; AVX-64-NEXT: movl $1, %eax 1460 ; AVX-64-NEXT: vmovq %rax, %xmm3 1461 ; AVX-64-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7] 1462 ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1463 ; AVX-64-NEXT: vpaddq %xmm3, %xmm1, %xmm1 1464 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1465 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1466 ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1467 ; AVX-64-NEXT: vpaddq %xmm3, %xmm0, %xmm0 1468 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1469 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [0,1,0,1] 1470 ; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 1471 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1472 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1473 ; AVX-64-NEXT: retq 1474 ; 1475 ; AVX2-64-LABEL: f8xi64_i128: 1476 ; AVX2-64: # %bb.0: 1477 ; AVX2-64-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [0,1,0,1] 1478 ; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1479 ; AVX2-64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1480 ; AVX2-64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 1481 ; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1482 ; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1483 ; AVX2-64-NEXT: retq 1484 ; 1485 ; AVX512F-64-LABEL: f8xi64_i128: 1486 ; AVX512F-64: # %bb.0: 1487 ; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,0,1,0,1,0,1] 1488 ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1489 ; AVX512F-64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 1490 ; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1491 ; AVX512F-64-NEXT: retq 1492 %res1 = add <8 x i64> <i64 0, i64 1, i64 0, i64 1, i64 0, i64 1, i64 0, i64 1>, %a 1493 %res2 = and <8 x i64> <i64 0, i64 1, i64 0, i64 1, i64 0, i64 1, i64 0, i64 1>, %res1 1494 ret <8 x i64> %res2 1495 } 1496 1497 1498 define <8 x i64> @f8xi64_i256(<8 x i64> %a) { 1499 ; AVX-LABEL: f8xi64_i256: 1500 ; AVX: # %bb.0: 1501 ; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2 1502 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [2,0,3,0] 1503 ; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1504 ; AVX-NEXT: vmovdqa {{.*#+}} xmm4 = [0,0,1,0] 1505 ; AVX-NEXT: vpaddq %xmm4, %xmm1, %xmm1 1506 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1507 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 1508 ; AVX-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1509 ; AVX-NEXT: vpaddq %xmm4, %xmm0, %xmm0 1510 ; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1511 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [0,0,1,0,2,0,3,0] 1512 ; AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 1513 ; AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 1514 ; AVX-NEXT: retl 1515 ; 1516 ; AVX2-LABEL: f8xi64_i256: 1517 ; AVX2: # %bb.0: 1518 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,0,1,0,2,0,3,0] 1519 ; AVX2-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1520 ; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0 1521 ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 1522 ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1523 ; AVX2-NEXT: retl 1524 ; 1525 ; AVX512-LABEL: f8xi64_i256: 1526 ; AVX512: # %bb.0: 1527 ; AVX512-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,0,0,1,0,2,0,3,0] 1528 ; AVX512-NEXT: vpaddq %zmm1, %zmm0, %zmm0 1529 ; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 1530 ; AVX512-NEXT: retl 1531 ; 1532 ; AVX-64-LABEL: f8xi64_i256: 1533 ; AVX-64: # %bb.0: 1534 ; AVX-64-NEXT: vextractf128 $1, %ymm1, %xmm2 1535 ; AVX-64-NEXT: vmovdqa {{.*#+}} xmm3 = [2,3] 1536 ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1537 ; AVX-64-NEXT: movl $1, %eax 1538 ; AVX-64-NEXT: vmovq %rax, %xmm4 1539 ; AVX-64-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4,5,6,7] 1540 ; AVX-64-NEXT: vpaddq %xmm4, %xmm1, %xmm1 1541 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1542 ; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2 1543 ; AVX-64-NEXT: vpaddq %xmm3, %xmm2, %xmm2 1544 ; AVX-64-NEXT: vpaddq %xmm4, %xmm0, %xmm0 1545 ; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1546 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [0,1,2,3] 1547 ; AVX-64-NEXT: vandps %ymm2, %ymm0, %ymm0 1548 ; AVX-64-NEXT: vandps %ymm2, %ymm1, %ymm1 1549 ; AVX-64-NEXT: retq 1550 ; 1551 ; AVX2-64-LABEL: f8xi64_i256: 1552 ; AVX2-64: # %bb.0: 1553 ; AVX2-64-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3] 1554 ; AVX2-64-NEXT: vpaddq %ymm2, %ymm1, %ymm1 1555 ; AVX2-64-NEXT: vpaddq %ymm2, %ymm0, %ymm0 1556 ; AVX2-64-NEXT: vpand %ymm2, %ymm0, %ymm0 1557 ; AVX2-64-NEXT: vpand %ymm2, %ymm1, %ymm1 1558 ; AVX2-64-NEXT: retq 1559 ; 1560 ; AVX512F-64-LABEL: f8xi64_i256: 1561 ; AVX512F-64: # %bb.0: 1562 ; AVX512F-64-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3] 1563 ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1564 ; AVX512F-64-NEXT: vpaddq %zmm1, %zmm0, %zmm0 1565 ; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 1566 ; AVX512F-64-NEXT: retq 1567 %res1 = add <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>, %a 1568 %res2 = and <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 0, i64 1, i64 2, i64 3>, %res1 1569 ret <8 x i64> %res2 1570 } 1571 1572 1573 define <4 x float> @f4xf32_f64(<4 x float> %a) { 1574 ; AVX-LABEL: f4xf32_f64: 1575 ; AVX: # %bb.0: 1576 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1577 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 1578 ; AVX-NEXT: vdivps %xmm0, %xmm1, %xmm0 1579 ; AVX-NEXT: retl 1580 ; 1581 ; ALL32-LABEL: f4xf32_f64: 1582 ; ALL32: # %bb.0: 1583 ; ALL32-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1584 ; ALL32-NEXT: vaddps %xmm1, %xmm0, %xmm0 1585 ; ALL32-NEXT: vdivps %xmm0, %xmm1, %xmm0 1586 ; ALL32-NEXT: retl 1587 ; 1588 ; AVX-64-LABEL: f4xf32_f64: 1589 ; AVX-64: # %bb.0: 1590 ; AVX-64-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1591 ; AVX-64-NEXT: vaddps %xmm1, %xmm0, %xmm0 1592 ; AVX-64-NEXT: vdivps %xmm0, %xmm1, %xmm0 1593 ; AVX-64-NEXT: retq 1594 ; 1595 ; ALL64-LABEL: f4xf32_f64: 1596 ; ALL64: # %bb.0: 1597 ; ALL64-NEXT: vpbroadcastq {{.*#+}} xmm1 = [4575657222482165760,4575657222482165760] 1598 ; ALL64-NEXT: vaddps %xmm1, %xmm0, %xmm0 1599 ; ALL64-NEXT: vdivps %xmm0, %xmm1, %xmm0 1600 ; ALL64-NEXT: retq 1601 %res1 = fadd <4 x float> <float 2.0, float 1.0, float 2.0, float 1.0>, %a 1602 %res2 = fdiv <4 x float> <float 2.0, float 1.0, float 2.0, float 1.0>, %res1 1603 ret <4 x float> %res2 1604 } 1605 1606 1607 define <8 x float> @f8xf32_f64(<8 x float> %a) { 1608 ; AVX-LABEL: f8xf32_f64: 1609 ; AVX: # %bb.0: 1610 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492] 1611 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 1612 ; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0 1613 ; AVX-NEXT: retl 1614 ; 1615 ; ALL32-LABEL: f8xf32_f64: 1616 ; ALL32: # %bb.0: 1617 ; ALL32-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492] 1618 ; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0 1619 ; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0 1620 ; ALL32-NEXT: retl 1621 ; 1622 ; AVX-64-LABEL: f8xf32_f64: 1623 ; AVX-64: # %bb.0: 1624 ; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492] 1625 ; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0 1626 ; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0 1627 ; AVX-64-NEXT: retq 1628 ; 1629 ; ALL64-LABEL: f8xf32_f64: 1630 ; ALL64: # %bb.0: 1631 ; ALL64-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760] 1632 ; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0 1633 ; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0 1634 ; ALL64-NEXT: retq 1635 %res1 = fadd <8 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %a 1636 %res2 = fdiv <8 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %res1 1637 ret <8 x float> %res2 1638 } 1639 1640 1641 define <8 x float> @f8xf32_f128(<8 x float> %a) { 1642 ; AVX-LABEL: f8xf32_f128: 1643 ; AVX: # %bb.0: 1644 ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1645 ; AVX-NEXT: # ymm1 = mem[0,1,0,1] 1646 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 1647 ; AVX-NEXT: vdivps %ymm0, %ymm1, %ymm0 1648 ; AVX-NEXT: retl 1649 ; 1650 ; ALL32-LABEL: f8xf32_f128: 1651 ; ALL32: # %bb.0: 1652 ; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1653 ; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 1654 ; ALL32-NEXT: vaddps %ymm1, %ymm0, %ymm0 1655 ; ALL32-NEXT: vdivps %ymm0, %ymm1, %ymm0 1656 ; ALL32-NEXT: retl 1657 ; 1658 ; AVX-64-LABEL: f8xf32_f128: 1659 ; AVX-64: # %bb.0: 1660 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1661 ; AVX-64-NEXT: # ymm1 = mem[0,1,0,1] 1662 ; AVX-64-NEXT: vaddps %ymm1, %ymm0, %ymm0 1663 ; AVX-64-NEXT: vdivps %ymm0, %ymm1, %ymm0 1664 ; AVX-64-NEXT: retq 1665 ; 1666 ; ALL64-LABEL: f8xf32_f128: 1667 ; ALL64: # %bb.0: 1668 ; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1669 ; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 1670 ; ALL64-NEXT: vaddps %ymm1, %ymm0, %ymm0 1671 ; ALL64-NEXT: vdivps %ymm0, %ymm1, %ymm0 1672 ; ALL64-NEXT: retq 1673 %res1 = fadd <8 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %a 1674 %res2 = fdiv <8 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %res1 1675 ret <8 x float> %res2 1676 } 1677 1678 1679 define <16 x float> @f16xf32_f64(<16 x float> %a) { 1680 ; AVX-LABEL: f16xf32_f64: 1681 ; AVX: # %bb.0: 1682 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492] 1683 ; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1 1684 ; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0 1685 ; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0 1686 ; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1 1687 ; AVX-NEXT: retl 1688 ; 1689 ; AVX2-LABEL: f16xf32_f64: 1690 ; AVX2: # %bb.0: 1691 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492] 1692 ; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1 1693 ; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0 1694 ; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0 1695 ; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1 1696 ; AVX2-NEXT: retl 1697 ; 1698 ; AVX512-LABEL: f16xf32_f64: 1699 ; AVX512: # %bb.0: 1700 ; AVX512-NEXT: vbroadcastsd {{.*#+}} zmm1 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492] 1701 ; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0 1702 ; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0 1703 ; AVX512-NEXT: retl 1704 ; 1705 ; AVX-64-LABEL: f16xf32_f64: 1706 ; AVX-64: # %bb.0: 1707 ; AVX-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [0.0078125018626451492,0.0078125018626451492,0.0078125018626451492,0.0078125018626451492] 1708 ; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1709 ; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1710 ; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1711 ; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1712 ; AVX-64-NEXT: retq 1713 ; 1714 ; AVX2-64-LABEL: f16xf32_f64: 1715 ; AVX2-64: # %bb.0: 1716 ; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760] 1717 ; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1718 ; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1719 ; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1720 ; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1721 ; AVX2-64-NEXT: retq 1722 ; 1723 ; AVX512F-64-LABEL: f16xf32_f64: 1724 ; AVX512F-64: # %bb.0: 1725 ; AVX512F-64-NEXT: vbroadcastsd {{.*#+}} zmm1 = [4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760,4575657222482165760] 1726 ; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0 1727 ; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0 1728 ; AVX512F-64-NEXT: retq 1729 %res1 = fadd <16 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %a 1730 %res2 = fdiv <16 x float> <float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0, float 2.0, float 1.0>, %res1 1731 ret <16 x float> %res2 1732 } 1733 1734 1735 define <16 x float> @f16xf32_f128(<16 x float> %a) { 1736 ; AVX-LABEL: f16xf32_f128: 1737 ; AVX: # %bb.0: 1738 ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1739 ; AVX-NEXT: # ymm2 = mem[0,1,0,1] 1740 ; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1 1741 ; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0 1742 ; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0 1743 ; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1 1744 ; AVX-NEXT: retl 1745 ; 1746 ; AVX2-LABEL: f16xf32_f128: 1747 ; AVX2: # %bb.0: 1748 ; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1749 ; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1750 ; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1 1751 ; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0 1752 ; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0 1753 ; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1 1754 ; AVX2-NEXT: retl 1755 ; 1756 ; AVX512-LABEL: f16xf32_f128: 1757 ; AVX512: # %bb.0: 1758 ; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1759 ; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1760 ; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0 1761 ; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0 1762 ; AVX512-NEXT: retl 1763 ; 1764 ; AVX-64-LABEL: f16xf32_f128: 1765 ; AVX-64: # %bb.0: 1766 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1767 ; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 1768 ; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1769 ; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1770 ; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1771 ; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1772 ; AVX-64-NEXT: retq 1773 ; 1774 ; AVX2-64-LABEL: f16xf32_f128: 1775 ; AVX2-64: # %bb.0: 1776 ; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1777 ; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1778 ; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1779 ; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1780 ; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1781 ; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1782 ; AVX2-64-NEXT: retq 1783 ; 1784 ; AVX512F-64-LABEL: f16xf32_f128: 1785 ; AVX512F-64: # %bb.0: 1786 ; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1787 ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1788 ; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0 1789 ; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0 1790 ; AVX512F-64-NEXT: retq 1791 %res1 = fadd <16 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %a 1792 %res2 = fdiv <16 x float> <float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0, float 4.0, float 1.0, float 2.0, float 3.0>, %res1 1793 ret <16 x float> %res2 1794 } 1795 1796 1797 define <16 x float> @f16xf32_f256(<16 x float> %a) { 1798 ; AVX-LABEL: f16xf32_f256: 1799 ; AVX: # %bb.0: 1800 ; AVX-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] 1801 ; AVX-NEXT: vaddps %ymm2, %ymm1, %ymm1 1802 ; AVX-NEXT: vaddps %ymm2, %ymm0, %ymm0 1803 ; AVX-NEXT: vdivps %ymm0, %ymm2, %ymm0 1804 ; AVX-NEXT: vdivps %ymm1, %ymm2, %ymm1 1805 ; AVX-NEXT: retl 1806 ; 1807 ; AVX2-LABEL: f16xf32_f256: 1808 ; AVX2: # %bb.0: 1809 ; AVX2-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] 1810 ; AVX2-NEXT: vaddps %ymm2, %ymm1, %ymm1 1811 ; AVX2-NEXT: vaddps %ymm2, %ymm0, %ymm0 1812 ; AVX2-NEXT: vdivps %ymm0, %ymm2, %ymm0 1813 ; AVX2-NEXT: vdivps %ymm1, %ymm2, %ymm1 1814 ; AVX2-NEXT: retl 1815 ; 1816 ; AVX512-LABEL: f16xf32_f256: 1817 ; AVX512: # %bb.0: 1818 ; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] 1819 ; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1820 ; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0 1821 ; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0 1822 ; AVX512-NEXT: retl 1823 ; 1824 ; AVX-64-LABEL: f16xf32_f256: 1825 ; AVX-64: # %bb.0: 1826 ; AVX-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] 1827 ; AVX-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1828 ; AVX-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1829 ; AVX-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1830 ; AVX-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1831 ; AVX-64-NEXT: retq 1832 ; 1833 ; AVX2-64-LABEL: f16xf32_f256: 1834 ; AVX2-64: # %bb.0: 1835 ; AVX2-64-NEXT: vmovaps {{.*#+}} ymm2 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] 1836 ; AVX2-64-NEXT: vaddps %ymm2, %ymm1, %ymm1 1837 ; AVX2-64-NEXT: vaddps %ymm2, %ymm0, %ymm0 1838 ; AVX2-64-NEXT: vdivps %ymm0, %ymm2, %ymm0 1839 ; AVX2-64-NEXT: vdivps %ymm1, %ymm2, %ymm1 1840 ; AVX2-64-NEXT: retq 1841 ; 1842 ; AVX512F-64-LABEL: f16xf32_f256: 1843 ; AVX512F-64: # %bb.0: 1844 ; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00,8.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,5.000000e+00,6.000000e+00,7.000000e+00] 1845 ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1846 ; AVX512F-64-NEXT: vaddps %zmm1, %zmm0, %zmm0 1847 ; AVX512F-64-NEXT: vdivps %zmm0, %zmm1, %zmm0 1848 ; AVX512F-64-NEXT: retq 1849 %res1 = fadd <16 x float> <float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, %a 1850 %res2 = fdiv <16 x float> <float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, %res1 1851 ret <16 x float> %res2 1852 } 1853 1854 1855 define <4 x double> @f4xf64_f128(<4 x double> %a) { 1856 ; AVX-LABEL: f4xf64_f128: 1857 ; AVX: # %bb.0: 1858 ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1859 ; AVX-NEXT: # ymm1 = mem[0,1,0,1] 1860 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1861 ; AVX-NEXT: vdivpd %ymm0, %ymm1, %ymm0 1862 ; AVX-NEXT: retl 1863 ; 1864 ; ALL32-LABEL: f4xf64_f128: 1865 ; ALL32: # %bb.0: 1866 ; ALL32-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1867 ; ALL32-NEXT: # ymm1 = mem[0,1,0,1] 1868 ; ALL32-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1869 ; ALL32-NEXT: vdivpd %ymm0, %ymm1, %ymm0 1870 ; ALL32-NEXT: retl 1871 ; 1872 ; AVX-64-LABEL: f4xf64_f128: 1873 ; AVX-64: # %bb.0: 1874 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1875 ; AVX-64-NEXT: # ymm1 = mem[0,1,0,1] 1876 ; AVX-64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1877 ; AVX-64-NEXT: vdivpd %ymm0, %ymm1, %ymm0 1878 ; AVX-64-NEXT: retq 1879 ; 1880 ; ALL64-LABEL: f4xf64_f128: 1881 ; ALL64: # %bb.0: 1882 ; ALL64-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1883 ; ALL64-NEXT: # ymm1 = mem[0,1,0,1] 1884 ; ALL64-NEXT: vaddpd %ymm1, %ymm0, %ymm0 1885 ; ALL64-NEXT: vdivpd %ymm0, %ymm1, %ymm0 1886 ; ALL64-NEXT: retq 1887 %res1 = fadd <4 x double> <double 2.0, double 1.0, double 2.0, double 1.0>, %a 1888 %res2 = fdiv <4 x double> <double 2.0, double 1.0, double 2.0, double 1.0>, %res1 1889 ret <4 x double> %res2 1890 } 1891 1892 1893 define <8 x double> @f8xf64_f128(<8 x double> %a) { 1894 ; AVX-LABEL: f8xf64_f128: 1895 ; AVX: # %bb.0: 1896 ; AVX-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1897 ; AVX-NEXT: # ymm2 = mem[0,1,0,1] 1898 ; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1899 ; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1900 ; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1901 ; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1902 ; AVX-NEXT: retl 1903 ; 1904 ; AVX2-LABEL: f8xf64_f128: 1905 ; AVX2: # %bb.0: 1906 ; AVX2-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1907 ; AVX2-NEXT: # ymm2 = mem[0,1,0,1] 1908 ; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1909 ; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1910 ; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1911 ; AVX2-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1912 ; AVX2-NEXT: retl 1913 ; 1914 ; AVX512-LABEL: f8xf64_f128: 1915 ; AVX512: # %bb.0: 1916 ; AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1917 ; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1918 ; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 1919 ; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0 1920 ; AVX512-NEXT: retl 1921 ; 1922 ; AVX-64-LABEL: f8xf64_f128: 1923 ; AVX-64: # %bb.0: 1924 ; AVX-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1925 ; AVX-64-NEXT: # ymm2 = mem[0,1,0,1] 1926 ; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1927 ; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1928 ; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1929 ; AVX-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1930 ; AVX-64-NEXT: retq 1931 ; 1932 ; AVX2-64-LABEL: f8xf64_f128: 1933 ; AVX2-64: # %bb.0: 1934 ; AVX2-64-NEXT: vbroadcastf128 {{.*#+}} ymm2 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1935 ; AVX2-64-NEXT: # ymm2 = mem[0,1,0,1] 1936 ; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1937 ; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1938 ; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1939 ; AVX2-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1940 ; AVX2-64-NEXT: retq 1941 ; 1942 ; AVX512F-64-LABEL: f8xf64_f128: 1943 ; AVX512F-64: # %bb.0: 1944 ; AVX512F-64-NEXT: vbroadcastf32x4 {{.*#+}} zmm1 = [2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00,2.000000e+00,1.000000e+00] 1945 ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 1946 ; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 1947 ; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0 1948 ; AVX512F-64-NEXT: retq 1949 %res1 = fadd <8 x double> <double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0>, %a 1950 %res2 = fdiv <8 x double> <double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0, double 2.0, double 1.0>, %res1 1951 ret <8 x double> %res2 1952 } 1953 1954 1955 ; AVX512: .LCPI37 1956 ; AVX512-NEXT: .quad 4616189618054758400 # double 4 1957 ; AVX512-NEXT: .quad 4607182418800017408 # double 1 1958 ; AVX512-NEXT: .quad 4611686018427387904 # double 2 1959 ; AVX512-NEXT: .quad 4613937818241073152 # double 3 1960 ; AVX512-NOT: .quad 1961 1962 define <8 x double> @f8xf64_f256(<8 x double> %a) { 1963 ; AVX-LABEL: f8xf64_f256: 1964 ; AVX: # %bb.0: 1965 ; AVX-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1966 ; AVX-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1967 ; AVX-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1968 ; AVX-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1969 ; AVX-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1970 ; AVX-NEXT: retl 1971 ; 1972 ; AVX2-LABEL: f8xf64_f256: 1973 ; AVX2: # %bb.0: 1974 ; AVX2-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1975 ; AVX2-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1976 ; AVX2-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1977 ; AVX2-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1978 ; AVX2-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1979 ; AVX2-NEXT: retl 1980 ; 1981 ; AVX512-LABEL: f8xf64_f256: 1982 ; AVX512: # %bb.0: 1983 ; AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1984 ; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 1985 ; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0 1986 ; AVX512-NEXT: vdivpd %zmm0, %zmm1, %zmm0 1987 ; AVX512-NEXT: retl 1988 ; 1989 ; AVX-64-LABEL: f8xf64_f256: 1990 ; AVX-64: # %bb.0: 1991 ; AVX-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 1992 ; AVX-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 1993 ; AVX-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 1994 ; AVX-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0 1995 ; AVX-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1 1996 ; AVX-64-NEXT: retq 1997 ; 1998 ; AVX2-64-LABEL: f8xf64_f256: 1999 ; AVX2-64: # %bb.0: 2000 ; AVX2-64-NEXT: vmovapd {{.*#+}} ymm2 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 2001 ; AVX2-64-NEXT: vaddpd %ymm2, %ymm1, %ymm1 2002 ; AVX2-64-NEXT: vaddpd %ymm2, %ymm0, %ymm0 2003 ; AVX2-64-NEXT: vdivpd %ymm0, %ymm2, %ymm0 2004 ; AVX2-64-NEXT: vdivpd %ymm1, %ymm2, %ymm1 2005 ; AVX2-64-NEXT: retq 2006 ; 2007 ; AVX512F-64-LABEL: f8xf64_f256: 2008 ; AVX512F-64: # %bb.0: 2009 ; AVX512F-64-NEXT: vbroadcastf64x4 {{.*#+}} zmm1 = [4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00,4.000000e+00,1.000000e+00,2.000000e+00,3.000000e+00] 2010 ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3] 2011 ; AVX512F-64-NEXT: vaddpd %zmm1, %zmm0, %zmm0 2012 ; AVX512F-64-NEXT: vdivpd %zmm0, %zmm1, %zmm0 2013 ; AVX512F-64-NEXT: retq 2014 %res1 = fadd <8 x double> <double 4.0, double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0>, %a 2015 %res2 = fdiv <8 x double> <double 4.0, double 1.0, double 2.0, double 3.0, double 4.0, double 1.0, double 2.0, double 3.0>, %res1 2016 ret <8 x double> %res2 2017 } 2018 2019 2020 2021 define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) { 2022 ; AVX-LABEL: f8xi16_i32_NaN: 2023 ; AVX: # %bb.0: 2024 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 2025 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2026 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 2027 ; AVX-NEXT: retl 2028 ; 2029 ; ALL32-LABEL: f8xi16_i32_NaN: 2030 ; ALL32: # %bb.0: 2031 ; ALL32-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776] 2032 ; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2033 ; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0 2034 ; ALL32-NEXT: retl 2035 ; 2036 ; AVX-64-LABEL: f8xi16_i32_NaN: 2037 ; AVX-64: # %bb.0: 2038 ; AVX-64-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 2039 ; AVX-64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2040 ; AVX-64-NEXT: vpand %xmm1, %xmm0, %xmm0 2041 ; AVX-64-NEXT: retq 2042 ; 2043 ; ALL64-LABEL: f8xi16_i32_NaN: 2044 ; ALL64: # %bb.0: 2045 ; ALL64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4290379776,4290379776,4290379776,4290379776] 2046 ; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 2047 ; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0 2048 ; ALL64-NEXT: retq 2049 %res1 = add <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %a 2050 %res2 = and <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %res1 2051 ret <8 x i16> %res2 2052 } 2053