1 ; RUN: opt < %s -instcombine -S | FileCheck %s 2 3 ; 4 ; EXTRQ 5 ; 6 7 define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) { 8 ; CHECK-LABEL: @test_extrq_call 9 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) 10 ; CHECK-NEXT: ret <2 x i64> %1 11 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind 12 ret <2 x i64> %1 13 } 14 15 define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) { 16 ; CHECK-LABEL: @test_extrq_zero_arg0 17 ; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef> 18 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind 19 ret <2 x i64> %1 20 } 21 22 define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) { 23 ; CHECK-LABEL: @test_extrq_zero_arg1 24 ; CHECK-NEXT: ret <2 x i64> %x 25 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind 26 ret <2 x i64> %1 27 } 28 29 define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) { 30 ; CHECK-LABEL: @test_extrq_to_extqi 31 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15) 32 ; CHECK-NEXT: ret <2 x i64> %1 33 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind 34 ret <2 x i64> %1 35 } 36 37 define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) { 38 ; CHECK-LABEL: @test_extrq_constant 39 ; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef> 40 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind 41 ret <2 x i64> %1 42 } 43 44 define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) { 45 ; CHECK-LABEL: @test_extrq_constant_undef 46 ; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef> 47 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind 48 ret <2 x i64> %1 49 } 50 51 ; 52 ; EXTRQI 53 ; 54 55 define <2 x i64> @test_extrqi_call(<2 x i64> %x) { 56 ; CHECK-LABEL: @test_extrqi_call 57 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23) 58 ; CHECK-NEXT: ret <2 x i64> %1 59 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23) 60 ret <2 x i64> %1 61 } 62 63 define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) { 64 ; CHECK-LABEL: @test_extrqi_shuffle_1zuu 65 ; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8> 66 ; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 67 ; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> 68 ; CHECK-NEXT: ret <2 x i64> %3 69 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32) 70 ret <2 x i64> %1 71 } 72 73 define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) { 74 ; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu 75 ; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8> 76 ; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 77 ; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64> 78 ; CHECK-NEXT: ret <2 x i64> %3 79 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16) 80 ret <2 x i64> %1 81 } 82 83 define <2 x i64> @test_extrqi_undef(<2 x i64> %x) { 84 ; CHECK-LABEL: @test_extrqi_undef 85 ; CHECK-NEXT: ret <2 x i64> undef 86 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33) 87 ret <2 x i64> %1 88 } 89 90 define <2 x i64> @test_extrqi_zero(<2 x i64> %x) { 91 ; CHECK-LABEL: @test_extrqi_zero 92 ; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef> 93 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18) 94 ret <2 x i64> %1 95 } 96 97 define <2 x i64> @test_extrqi_constant(<2 x i64> %x) { 98 ; CHECK-LABEL: @test_extrqi_constant 99 ; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef> 100 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18) 101 ret <2 x i64> %1 102 } 103 104 define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) { 105 ; CHECK-LABEL: @test_extrqi_constant_undef 106 ; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef> 107 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18) 108 ret <2 x i64> %1 109 } 110 111 ; 112 ; INSERTQ 113 ; 114 115 define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) { 116 ; CHECK-LABEL: @test_insertq_call 117 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) 118 ; CHECK-NEXT: ret <2 x i64> %1 119 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind 120 ret <2 x i64> %1 121 } 122 123 define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) { 124 ; CHECK-LABEL: @test_insertq_to_insertqi 125 ; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2) 126 ; CHECK-NEXT: ret <2 x i64> %1 127 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind 128 ret <2 x i64> %1 129 } 130 131 define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) { 132 ; CHECK-LABEL: @test_insertq_constant 133 ; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef> 134 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind 135 ret <2 x i64> %1 136 } 137 138 define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) { 139 ; CHECK-LABEL: @test_insertq_constant_undef 140 ; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef> 141 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind 142 ret <2 x i64> %1 143 } 144 145 ; 146 ; INSERTQI 147 ; 148 149 define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { 150 ; CHECK-LABEL: @test_insertqi_shuffle_04uu 151 ; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 152 ; CHECK-NEXT: ret <16 x i8> %1 153 %1 = bitcast <16 x i8> %v to <2 x i64> 154 %2 = bitcast <16 x i8> %i to <2 x i64> 155 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32) 156 %4 = bitcast <2 x i64> %3 to <16 x i8> 157 ret <16 x i8> %4 158 } 159 160 define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) { 161 ; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu 162 ; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 163 ; CHECK-NEXT: ret <16 x i8> %1 164 %1 = bitcast <16 x i8> %v to <2 x i64> 165 %2 = bitcast <16 x i8> %i to <2 x i64> 166 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0) 167 %4 = bitcast <2 x i64> %3 to <16 x i8> 168 ret <16 x i8> %4 169 } 170 171 define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) { 172 ; CHECK-LABEL: @test_insertqi_constant 173 ; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef> 174 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1) 175 ret <2 x i64> %1 176 } 177 178 ; The result of this insert is the second arg, since the top 64 bits of 179 ; the result are undefined, and we copy the bottom 64 bits from the 180 ; second arg 181 define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { 182 ; CHECK-LABEL: @testInsert64Bits 183 ; CHECK-NEXT: ret <2 x i64> %i 184 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0) 185 ret <2 x i64> %1 186 } 187 188 define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { 189 ; CHECK-LABEL: @testZeroLength 190 ; CHECK-NEXT: ret <2 x i64> %i 191 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0) 192 ret <2 x i64> %1 193 } 194 195 define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) { 196 ; CHECK-LABEL: @testUndefinedInsertq_1 197 ; CHECK-NEXT: ret <2 x i64> undef 198 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16) 199 ret <2 x i64> %1 200 } 201 202 define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) { 203 ; CHECK-LABEL: @testUndefinedInsertq_2 204 ; CHECK-NEXT: ret <2 x i64> undef 205 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32) 206 ret <2 x i64> %1 207 } 208 209 define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) { 210 ; CHECK-LABEL: @testUndefinedInsertq_3 211 ; CHECK-NEXT: ret <2 x i64> undef 212 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16) 213 ret <2 x i64> %1 214 } 215 216 ; 217 ; Vector Demanded Bits 218 ; 219 220 define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) { 221 ; CHECK-LABEL: @test_extrq_arg0 222 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) 223 ; CHECK-NEXT: ret <2 x i64> %1 224 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 225 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind 226 ret <2 x i64> %2 227 } 228 229 define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) { 230 ; CHECK-LABEL: @test_extrq_arg1 231 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) 232 ; CHECK-NEXT: ret <2 x i64> %1 233 %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 234 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind 235 ret <2 x i64> %2 236 } 237 238 define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) { 239 ; CHECK-LABEL: @test_extrq_args01 240 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) 241 ; CHECK-NEXT: ret <2 x i64> %1 242 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 243 %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 244 %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind 245 ret <2 x i64> %3 246 } 247 248 define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) { 249 ; CHECK-LABEL: @test_extrq_ret 250 ; CHECK-NEXT: ret <2 x i64> undef 251 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind 252 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 253 ret <2 x i64> %2 254 } 255 256 define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) { 257 ; CHECK-LABEL: @test_extrqi_arg0 258 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) 259 ; CHECK-NEXT: ret <2 x i64> %1 260 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 261 %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2) 262 ret <2 x i64> %2 263 } 264 265 define <2 x i64> @test_extrqi_ret(<2 x i64> %x) { 266 ; CHECK-LABEL: @test_extrqi_ret 267 ; CHECK-NEXT: ret <2 x i64> undef 268 %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind 269 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 270 ret <2 x i64> %2 271 } 272 273 define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) { 274 ; CHECK-LABEL: @test_insertq_arg0 275 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) 276 ; CHECK-NEXT: ret <2 x i64> %1 277 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 278 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind 279 ret <2 x i64> %2 280 } 281 282 define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) { 283 ; CHECK-LABEL: @test_insertq_ret 284 ; CHECK-NEXT: ret <2 x i64> undef 285 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind 286 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 287 ret <2 x i64> %2 288 } 289 290 define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) { 291 ; CHECK-LABEL: @test_insertqi_arg0 292 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) 293 ; CHECK-NEXT: ret <2 x i64> %1 294 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 295 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind 296 ret <2 x i64> %2 297 } 298 299 define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) { 300 ; CHECK-LABEL: @test_insertqi_arg1 301 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) 302 ; CHECK-NEXT: ret <2 x i64> %1 303 %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 304 %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind 305 ret <2 x i64> %2 306 } 307 308 define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) { 309 ; CHECK-LABEL: @test_insertqi_args01 310 ; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) 311 ; CHECK-NEXT: ret <2 x i64> %1 312 %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 313 %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 314 %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind 315 ret <2 x i64> %3 316 } 317 318 define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) { 319 ; CHECK-LABEL: @test_insertqi_ret 320 ; CHECK-NEXT: ret <2 x i64> undef 321 %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind 322 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 323 ret <2 x i64> %2 324 } 325 326 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq 327 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind 328 329 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi 330 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind 331 332 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq 333 declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind 334 335 ; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi 336 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind 337