1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; RUN: opt < %s -instcombine -S | FileCheck %s 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 5 define float @test_rcp_ss_0(float %a) { 6 ; CHECK-LABEL: @test_rcp_ss_0( 7 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 8 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]]) 9 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 10 ; CHECK-NEXT: ret float [[TMP3]] 11 ; 12 %1 = insertelement <4 x float> undef, float %a, i32 0 13 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 14 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 15 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 16 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) 17 %6 = extractelement <4 x float> %5, i32 0 18 ret float %6 19 } 20 21 define float @test_rcp_ss_1(float %a) { 22 ; CHECK-LABEL: @test_rcp_ss_1( 23 ; CHECK-NEXT: ret float 1.000000e+00 24 ; 25 %1 = insertelement <4 x float> undef, float %a, i32 0 26 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 27 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 28 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 29 %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4) 30 %6 = extractelement <4 x float> %5, i32 1 31 ret float %6 32 } 33 34 define float @test_sqrt_ss_0(float %a) { 35 ; CHECK-LABEL: @test_sqrt_ss_0( 36 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 37 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP1]]) 38 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 39 ; CHECK-NEXT: ret float [[TMP3]] 40 ; 41 %1 = insertelement <4 x float> undef, float %a, i32 0 42 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 43 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 44 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 45 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4) 46 %6 = extractelement <4 x float> %5, i32 0 47 ret float %6 48 } 49 50 define float @test_sqrt_ss_2(float %a) { 51 ; CHECK-LABEL: @test_sqrt_ss_2( 52 ; CHECK-NEXT: ret float 2.000000e+00 53 ; 54 %1 = insertelement <4 x float> undef, float %a, i32 0 55 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 56 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 57 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 58 %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4) 59 %6 = extractelement <4 x float> %5, i32 2 60 ret float %6 61 } 62 63 define float @test_rsqrt_ss_0(float %a) { 64 ; CHECK-LABEL: @test_rsqrt_ss_0( 65 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 66 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]]) 67 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 68 ; CHECK-NEXT: ret float [[TMP3]] 69 ; 70 %1 = insertelement <4 x float> undef, float %a, i32 0 71 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 72 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 73 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 74 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) 75 %6 = extractelement <4 x float> %5, i32 0 76 ret float %6 77 } 78 79 define float @test_rsqrt_ss_3(float %a) { 80 ; CHECK-LABEL: @test_rsqrt_ss_3( 81 ; CHECK-NEXT: ret float 3.000000e+00 82 ; 83 %1 = insertelement <4 x float> undef, float %a, i32 0 84 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 85 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 86 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 87 %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4) 88 %6 = extractelement <4 x float> %5, i32 3 89 ret float %6 90 } 91 92 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) { 93 ; CHECK-LABEL: @test_add_ss( 94 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %b) 95 ; CHECK-NEXT: ret <4 x float> [[TMP1]] 96 ; 97 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 98 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 99 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 100 %4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3) 101 ret <4 x float> %4 102 } 103 104 define float @test_add_ss_0(float %a, float %b) { 105 ; CHECK-LABEL: @test_add_ss_0( 106 ; CHECK-NEXT: [[TMP1:%.*]] = fadd float %a, %b 107 ; CHECK-NEXT: ret float [[TMP1]] 108 ; 109 %1 = insertelement <4 x float> undef, float %a, i32 0 110 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 111 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 112 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 113 %5 = insertelement <4 x float> undef, float %b, i32 0 114 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 115 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 116 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 117 %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8) 118 %r = extractelement <4 x float> %9, i32 0 119 ret float %r 120 } 121 122 define float @test_add_ss_1(float %a, float %b) { 123 ; CHECK-LABEL: @test_add_ss_1( 124 ; CHECK-NEXT: ret float 1.000000e+00 125 ; 126 %1 = insertelement <4 x float> undef, float %a, i32 0 127 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 128 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 129 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 130 %5 = insertelement <4 x float> undef, float %b, i32 0 131 %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5) 132 %7 = extractelement <4 x float> %6, i32 1 133 ret float %7 134 } 135 136 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) { 137 ; CHECK-LABEL: @test_sub_ss( 138 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %b) 139 ; CHECK-NEXT: ret <4 x float> [[TMP1]] 140 ; 141 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 142 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 143 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 144 %4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3) 145 ret <4 x float> %4 146 } 147 148 define float @test_sub_ss_0(float %a, float %b) { 149 ; CHECK-LABEL: @test_sub_ss_0( 150 ; CHECK-NEXT: [[TMP1:%.*]] = fsub float %a, %b 151 ; CHECK-NEXT: ret float [[TMP1]] 152 ; 153 %1 = insertelement <4 x float> undef, float %a, i32 0 154 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 155 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 156 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 157 %5 = insertelement <4 x float> undef, float %b, i32 0 158 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 159 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 160 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 161 %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8) 162 %r = extractelement <4 x float> %9, i32 0 163 ret float %r 164 } 165 166 define float @test_sub_ss_2(float %a, float %b) { 167 ; CHECK-LABEL: @test_sub_ss_2( 168 ; CHECK-NEXT: ret float 2.000000e+00 169 ; 170 %1 = insertelement <4 x float> undef, float %a, i32 0 171 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 172 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 173 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 174 %5 = insertelement <4 x float> undef, float %b, i32 0 175 %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5) 176 %7 = extractelement <4 x float> %6, i32 2 177 ret float %7 178 } 179 180 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) { 181 ; CHECK-LABEL: @test_mul_ss( 182 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %b) 183 ; CHECK-NEXT: ret <4 x float> [[TMP1]] 184 ; 185 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 186 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 187 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 188 %4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3) 189 ret <4 x float> %4 190 } 191 192 define float @test_mul_ss_0(float %a, float %b) { 193 ; CHECK-LABEL: @test_mul_ss_0( 194 ; CHECK-NEXT: [[TMP1:%.*]] = fmul float %a, %b 195 ; CHECK-NEXT: ret float [[TMP1]] 196 ; 197 %1 = insertelement <4 x float> undef, float %a, i32 0 198 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 199 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 200 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 201 %5 = insertelement <4 x float> undef, float %b, i32 0 202 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 203 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 204 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 205 %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8) 206 %r = extractelement <4 x float> %9, i32 0 207 ret float %r 208 } 209 210 define float @test_mul_ss_3(float %a, float %b) { 211 ; CHECK-LABEL: @test_mul_ss_3( 212 ; CHECK-NEXT: ret float 3.000000e+00 213 ; 214 %1 = insertelement <4 x float> undef, float %a, i32 0 215 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 216 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 217 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 218 %5 = insertelement <4 x float> undef, float %b, i32 0 219 %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5) 220 %7 = extractelement <4 x float> %6, i32 3 221 ret float %7 222 } 223 224 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) { 225 ; CHECK-LABEL: @test_div_ss( 226 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %b) 227 ; CHECK-NEXT: ret <4 x float> [[TMP1]] 228 ; 229 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 230 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 231 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 232 %4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3) 233 ret <4 x float> %4 234 } 235 236 define float @test_div_ss_0(float %a, float %b) { 237 ; CHECK-LABEL: @test_div_ss_0( 238 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv float %a, %b 239 ; CHECK-NEXT: ret float [[TMP1]] 240 ; 241 %1 = insertelement <4 x float> undef, float %a, i32 0 242 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 243 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 244 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 245 %5 = insertelement <4 x float> undef, float %b, i32 0 246 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 247 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 248 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 249 %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8) 250 %r = extractelement <4 x float> %9, i32 0 251 ret float %r 252 } 253 254 define float @test_div_ss_1(float %a, float %b) { 255 ; CHECK-LABEL: @test_div_ss_1( 256 ; CHECK-NEXT: ret float 1.000000e+00 257 ; 258 %1 = insertelement <4 x float> undef, float %a, i32 0 259 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 260 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 261 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 262 %5 = insertelement <4 x float> undef, float %b, i32 0 263 %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5) 264 %7 = extractelement <4 x float> %6, i32 1 265 ret float %7 266 } 267 268 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) { 269 ; CHECK-LABEL: @test_min_ss( 270 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b) 271 ; CHECK-NEXT: ret <4 x float> [[TMP1]] 272 ; 273 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 274 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 275 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 276 %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3) 277 ret <4 x float> %4 278 } 279 280 define float @test_min_ss_0(float %a, float %b) { 281 ; CHECK-LABEL: @test_min_ss_0( 282 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 283 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 284 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 285 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 286 ; CHECK-NEXT: ret float [[TMP4]] 287 ; 288 %1 = insertelement <4 x float> undef, float %a, i32 0 289 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 290 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 291 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 292 %5 = insertelement <4 x float> undef, float %b, i32 0 293 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 294 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 295 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 296 %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8) 297 %10 = extractelement <4 x float> %9, i32 0 298 ret float %10 299 } 300 301 define float @test_min_ss_2(float %a, float %b) { 302 ; CHECK-LABEL: @test_min_ss_2( 303 ; CHECK-NEXT: ret float 2.000000e+00 304 ; 305 %1 = insertelement <4 x float> undef, float %a, i32 0 306 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 307 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 308 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 309 %5 = insertelement <4 x float> undef, float %b, i32 0 310 %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5) 311 %7 = extractelement <4 x float> %6, i32 2 312 ret float %7 313 } 314 315 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) { 316 ; CHECK-LABEL: @test_max_ss( 317 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b) 318 ; CHECK-NEXT: ret <4 x float> [[TMP1]] 319 ; 320 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 321 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 322 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 323 %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3) 324 ret <4 x float> %4 325 } 326 327 define float @test_max_ss_0(float %a, float %b) { 328 ; CHECK-LABEL: @test_max_ss_0( 329 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 330 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 331 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 332 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 333 ; CHECK-NEXT: ret float [[TMP4]] 334 ; 335 %1 = insertelement <4 x float> undef, float %a, i32 0 336 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 337 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 338 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 339 %5 = insertelement <4 x float> undef, float %b, i32 0 340 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 341 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 342 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 343 %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8) 344 %10 = extractelement <4 x float> %9, i32 0 345 ret float %10 346 } 347 348 define float @test_max_ss_3(float %a, float %b) { 349 ; CHECK-LABEL: @test_max_ss_3( 350 ; CHECK-NEXT: ret float 3.000000e+00 351 ; 352 %1 = insertelement <4 x float> undef, float %a, i32 0 353 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 354 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 355 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 356 %5 = insertelement <4 x float> undef, float %b, i32 0 357 %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5) 358 %7 = extractelement <4 x float> %6, i32 3 359 ret float %7 360 } 361 362 define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) { 363 ; CHECK-LABEL: @test_cmp_ss( 364 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0) 365 ; CHECK-NEXT: ret <4 x float> [[TMP1]] 366 ; 367 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1 368 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 369 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 370 %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0) 371 ret <4 x float> %4 372 } 373 374 define float @test_cmp_ss_0(float %a, float %b) { 375 ; CHECK-LABEL: @test_cmp_ss_0( 376 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 377 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 378 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0) 379 ; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0 380 ; CHECK-NEXT: ret float [[R]] 381 ; 382 %1 = insertelement <4 x float> undef, float %a, i32 0 383 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 384 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 385 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 386 %5 = insertelement <4 x float> undef, float %b, i32 0 387 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 388 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 389 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 390 %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0) 391 %r = extractelement <4 x float> %9, i32 0 392 ret float %r 393 } 394 395 define float @test_cmp_ss_1(float %a, float %b) { 396 ; CHECK-LABEL: @test_cmp_ss_1( 397 ; CHECK-NEXT: ret float 1.000000e+00 398 ; 399 %1 = insertelement <4 x float> undef, float %a, i32 0 400 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 401 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 402 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 403 %5 = insertelement <4 x float> undef, float %b, i32 0 404 %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0) 405 %7 = extractelement <4 x float> %6, i32 1 406 ret float %7 407 } 408 409 define i32 @test_comieq_ss_0(float %a, float %b) { 410 ; CHECK-LABEL: @test_comieq_ss_0( 411 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 412 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 413 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 414 ; CHECK-NEXT: ret i32 [[TMP3]] 415 ; 416 %1 = insertelement <4 x float> undef, float %a, i32 0 417 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 418 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 419 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 420 %5 = insertelement <4 x float> undef, float %b, i32 0 421 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 422 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 423 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 424 %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8) 425 ret i32 %9 426 } 427 428 define i32 @test_comige_ss_0(float %a, float %b) { 429 ; CHECK-LABEL: @test_comige_ss_0( 430 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 431 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 432 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 433 ; CHECK-NEXT: ret i32 [[TMP3]] 434 ; 435 %1 = insertelement <4 x float> undef, float %a, i32 0 436 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 437 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 438 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 439 %5 = insertelement <4 x float> undef, float %b, i32 0 440 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 441 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 442 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 443 %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8) 444 ret i32 %9 445 } 446 447 define i32 @test_comigt_ss_0(float %a, float %b) { 448 ; CHECK-LABEL: @test_comigt_ss_0( 449 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 450 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 451 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 452 ; CHECK-NEXT: ret i32 [[TMP3]] 453 ; 454 %1 = insertelement <4 x float> undef, float %a, i32 0 455 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 456 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 457 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 458 %5 = insertelement <4 x float> undef, float %b, i32 0 459 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 460 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 461 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 462 %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8) 463 ret i32 %9 464 } 465 466 define i32 @test_comile_ss_0(float %a, float %b) { 467 ; CHECK-LABEL: @test_comile_ss_0( 468 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 469 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 470 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 471 ; CHECK-NEXT: ret i32 [[TMP3]] 472 ; 473 %1 = insertelement <4 x float> undef, float %a, i32 0 474 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 475 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 476 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 477 %5 = insertelement <4 x float> undef, float %b, i32 0 478 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 479 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 480 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 481 %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8) 482 ret i32 %9 483 } 484 485 define i32 @test_comilt_ss_0(float %a, float %b) { 486 ; CHECK-LABEL: @test_comilt_ss_0( 487 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 488 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 489 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 490 ; CHECK-NEXT: ret i32 [[TMP3]] 491 ; 492 %1 = insertelement <4 x float> undef, float %a, i32 0 493 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 494 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 495 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 496 %5 = insertelement <4 x float> undef, float %b, i32 0 497 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 498 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 499 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 500 %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8) 501 ret i32 %9 502 } 503 504 define i32 @test_comineq_ss_0(float %a, float %b) { 505 ; CHECK-LABEL: @test_comineq_ss_0( 506 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 507 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 508 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 509 ; CHECK-NEXT: ret i32 [[TMP3]] 510 ; 511 %1 = insertelement <4 x float> undef, float %a, i32 0 512 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 513 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 514 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 515 %5 = insertelement <4 x float> undef, float %b, i32 0 516 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 517 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 518 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 519 %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8) 520 ret i32 %9 521 } 522 523 define i32 @test_ucomieq_ss_0(float %a, float %b) { 524 ; CHECK-LABEL: @test_ucomieq_ss_0( 525 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 526 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 527 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 528 ; CHECK-NEXT: ret i32 [[TMP3]] 529 ; 530 %1 = insertelement <4 x float> undef, float %a, i32 0 531 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 532 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 533 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 534 %5 = insertelement <4 x float> undef, float %b, i32 0 535 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 536 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 537 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 538 %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8) 539 ret i32 %9 540 } 541 542 define i32 @test_ucomige_ss_0(float %a, float %b) { 543 ; CHECK-LABEL: @test_ucomige_ss_0( 544 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 545 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 546 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 547 ; CHECK-NEXT: ret i32 [[TMP3]] 548 ; 549 %1 = insertelement <4 x float> undef, float %a, i32 0 550 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 551 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 552 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 553 %5 = insertelement <4 x float> undef, float %b, i32 0 554 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 555 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 556 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 557 %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8) 558 ret i32 %9 559 } 560 561 define i32 @test_ucomigt_ss_0(float %a, float %b) { 562 ; CHECK-LABEL: @test_ucomigt_ss_0( 563 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 564 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 565 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 566 ; CHECK-NEXT: ret i32 [[TMP3]] 567 ; 568 %1 = insertelement <4 x float> undef, float %a, i32 0 569 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 570 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 571 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 572 %5 = insertelement <4 x float> undef, float %b, i32 0 573 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 574 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 575 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 576 %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8) 577 ret i32 %9 578 } 579 580 define i32 @test_ucomile_ss_0(float %a, float %b) { 581 ; CHECK-LABEL: @test_ucomile_ss_0( 582 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 583 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 584 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 585 ; CHECK-NEXT: ret i32 [[TMP3]] 586 ; 587 %1 = insertelement <4 x float> undef, float %a, i32 0 588 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 589 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 590 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 591 %5 = insertelement <4 x float> undef, float %b, i32 0 592 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 593 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 594 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 595 %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8) 596 ret i32 %9 597 } 598 599 define i32 @test_ucomilt_ss_0(float %a, float %b) { 600 ; CHECK-LABEL: @test_ucomilt_ss_0( 601 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 602 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 603 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 604 ; CHECK-NEXT: ret i32 [[TMP3]] 605 ; 606 %1 = insertelement <4 x float> undef, float %a, i32 0 607 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 608 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 609 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 610 %5 = insertelement <4 x float> undef, float %b, i32 0 611 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 612 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 613 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 614 %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8) 615 ret i32 %9 616 } 617 618 define i32 @test_ucomineq_ss_0(float %a, float %b) { 619 ; CHECK-LABEL: @test_ucomineq_ss_0( 620 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 621 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0 622 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]]) 623 ; CHECK-NEXT: ret i32 [[TMP3]] 624 ; 625 %1 = insertelement <4 x float> undef, float %a, i32 0 626 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 627 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 628 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 629 %5 = insertelement <4 x float> undef, float %b, i32 0 630 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1 631 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2 632 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3 633 %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8) 634 ret i32 %9 635 } 636 637 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) 638 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) 639 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) 640 641 declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) 642 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) 643 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) 644 declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) 645 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) 646 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) 647 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) 648 649 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) 650 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) 651 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) 652 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) 653 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) 654 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) 655 656 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) 657 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) 658 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) 659 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) 660 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) 661 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) 662