1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; RUN: opt < %s -instcombine -S | FileCheck %s 3 4 define double @test_vfrcz_sd_0(double %a) { 5 ; CHECK-LABEL: @test_vfrcz_sd_0( 6 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 7 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]]) 8 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 9 ; CHECK-NEXT: ret double [[TMP3]] 10 ; 11 %1 = insertelement <2 x double> undef, double %a, i32 0 12 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 13 %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) 14 %4 = extractelement <2 x double> %3, i32 0 15 ret double %4 16 } 17 18 define double @test_vfrcz_sd_1(double %a) { 19 ; CHECK-LABEL: @test_vfrcz_sd_1( 20 ; CHECK-NEXT: ret double 1.000000e+00 21 ; 22 %1 = insertelement <2 x double> undef, double %a, i32 0 23 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 24 %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2) 25 %4 = extractelement <2 x double> %3, i32 1 26 ret double %4 27 } 28 29 define float @test_vfrcz_ss_0(float %a) { 30 ; CHECK-LABEL: @test_vfrcz_ss_0( 31 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 32 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]]) 33 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 34 ; CHECK-NEXT: ret float [[TMP3]] 35 ; 36 %1 = insertelement <4 x float> undef, float %a, i32 0 37 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 38 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 39 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 40 %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) 41 %6 = extractelement <4 x float> %5, i32 0 42 ret float %6 43 } 44 45 define float @test_vfrcz_ss_3(float %a) { 46 ; CHECK-LABEL: @test_vfrcz_ss_3( 47 ; CHECK-NEXT: ret float 3.000000e+00 48 ; 49 %1 = insertelement <4 x float> undef, float %a, i32 0 50 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 51 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2 52 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3 53 %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4) 54 %6 = extractelement <4 x float> %5, i32 3 55 ret float %6 56 } 57 58 define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) { 59 ; CHECK-LABEL: @cmp_slt_v2i64( 60 ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b 61 ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 62 ; CHECK-NEXT: ret <2 x i64> [[TMP2]] 63 ; 64 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b) 65 ret <2 x i64> %1 66 } 67 68 define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) { 69 ; CHECK-LABEL: @cmp_ult_v2i64( 70 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b 71 ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 72 ; CHECK-NEXT: ret <2 x i64> [[TMP2]] 73 ; 74 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b) 75 ret <2 x i64> %1 76 } 77 78 define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) { 79 ; CHECK-LABEL: @cmp_sle_v2i64( 80 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b 81 ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 82 ; CHECK-NEXT: ret <2 x i64> [[TMP2]] 83 ; 84 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b) 85 ret <2 x i64> %1 86 } 87 88 define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) { 89 ; CHECK-LABEL: @cmp_ule_v2i64( 90 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b 91 ; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64> 92 ; CHECK-NEXT: ret <2 x i64> [[TMP2]] 93 ; 94 %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b) 95 ret <2 x i64> %1 96 } 97 98 define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) { 99 ; CHECK-LABEL: @cmp_sgt_v4i32( 100 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b 101 ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 102 ; CHECK-NEXT: ret <4 x i32> [[TMP2]] 103 ; 104 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b) 105 ret <4 x i32> %1 106 } 107 108 define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) { 109 ; CHECK-LABEL: @cmp_ugt_v4i32( 110 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b 111 ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 112 ; CHECK-NEXT: ret <4 x i32> [[TMP2]] 113 ; 114 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b) 115 ret <4 x i32> %1 116 } 117 118 define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) { 119 ; CHECK-LABEL: @cmp_sge_v4i32( 120 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b 121 ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 122 ; CHECK-NEXT: ret <4 x i32> [[TMP2]] 123 ; 124 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b) 125 ret <4 x i32> %1 126 } 127 128 define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) { 129 ; CHECK-LABEL: @cmp_uge_v4i32( 130 ; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b 131 ; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 132 ; CHECK-NEXT: ret <4 x i32> [[TMP2]] 133 ; 134 %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b) 135 ret <4 x i32> %1 136 } 137 138 define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) { 139 ; CHECK-LABEL: @cmp_seq_v8i16( 140 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b 141 ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 142 ; CHECK-NEXT: ret <8 x i16> [[TMP2]] 143 ; 144 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b) 145 ret <8 x i16> %1 146 } 147 148 define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) { 149 ; CHECK-LABEL: @cmp_ueq_v8i16( 150 ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b 151 ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 152 ; CHECK-NEXT: ret <8 x i16> [[TMP2]] 153 ; 154 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b) 155 ret <8 x i16> %1 156 } 157 158 define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) { 159 ; CHECK-LABEL: @cmp_sne_v8i16( 160 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b 161 ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 162 ; CHECK-NEXT: ret <8 x i16> [[TMP2]] 163 ; 164 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b) 165 ret <8 x i16> %1 166 } 167 168 define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) { 169 ; CHECK-LABEL: @cmp_une_v8i16( 170 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b 171 ; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16> 172 ; CHECK-NEXT: ret <8 x i16> [[TMP2]] 173 ; 174 %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b) 175 ret <8 x i16> %1 176 } 177 178 define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) { 179 ; CHECK-LABEL: @cmp_strue_v16i8( 180 ; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 181 ; 182 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b) 183 ret <16 x i8> %1 184 } 185 186 define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) { 187 ; CHECK-LABEL: @cmp_utrue_v16i8( 188 ; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 189 ; 190 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b) 191 ret <16 x i8> %1 192 } 193 194 define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) { 195 ; CHECK-LABEL: @cmp_sfalse_v16i8( 196 ; CHECK-NEXT: ret <16 x i8> zeroinitializer 197 ; 198 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b) 199 ret <16 x i8> %1 200 } 201 202 define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) { 203 ; CHECK-LABEL: @cmp_ufalse_v16i8( 204 ; CHECK-NEXT: ret <16 x i8> zeroinitializer 205 ; 206 %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b) 207 ret <16 x i8> %1 208 } 209 210 declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone 211 declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone 212 213 declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone 214 declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone 215 declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone 216 declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone 217 declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone 218 declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone 219 declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone 220 declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone 221 222 declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone 223 declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone 224 declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone 225 declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone 226 declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone 227 declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone 228 declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone 229 declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone 230 231 declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone 232 declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone 233 declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone 234 declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone 235 declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone 236 declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone 237 declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone 238 declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone 239 240 declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone 241 declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone 242 declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone 243 declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone 244 declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone 245 declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone 246 declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone 247 declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone 248 249 declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone 250 declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone 251 declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone 252 declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone 253 declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone 254 declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone 255 declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone 256 declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone 257 258 declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone 259 declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone 260 declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone 261 declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone 262 declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone 263 declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone 264 declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone 265 declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone 266 267 declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone 268 declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone 269 declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone 270 declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone 271 declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone 272 declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone 273 declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone 274 declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone 275 276 declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone 277 declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone 278 declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone 279 declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone 280 declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone 281 declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone 282 declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone 283 declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone 284