1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s 2 3 define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) { 4 ; CHECK: test_vshr_n_s8 5 ; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 6 %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 7 ret <8 x i8> %vshr_n 8 } 9 10 define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) { 11 ; CHECK: test_vshr_n_s16 12 ; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 13 %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> 14 ret <4 x i16> %vshr_n 15 } 16 17 define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) { 18 ; CHECK: test_vshr_n_s32 19 ; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 20 %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3> 21 ret <2 x i32> %vshr_n 22 } 23 24 define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) { 25 ; CHECK: test_vshrq_n_s8 26 ; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 27 %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 28 ret <16 x i8> %vshr_n 29 } 30 31 define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) { 32 ; CHECK: test_vshrq_n_s16 33 ; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 34 %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 35 ret <8 x i16> %vshr_n 36 } 37 38 define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) { 39 ; CHECK: test_vshrq_n_s32 40 ; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 41 %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> 42 ret <4 x i32> %vshr_n 43 } 44 45 define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) { 46 ; CHECK: test_vshrq_n_s64 47 ; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 48 %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3> 49 ret <2 x i64> %vshr_n 50 } 51 52 define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) { 53 ; CHECK: test_vshr_n_u8 54 ; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 55 %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 56 ret <8 x i8> %vshr_n 57 } 58 59 define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) { 60 ; CHECK: test_vshr_n_u16 61 ; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 62 %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3> 63 ret <4 x i16> %vshr_n 64 } 65 66 define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) { 67 ; CHECK: test_vshr_n_u32 68 ; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 69 %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3> 70 ret <2 x i32> %vshr_n 71 } 72 73 define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) { 74 ; CHECK: test_vshrq_n_u8 75 ; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 76 %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 77 ret <16 x i8> %vshr_n 78 } 79 80 define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) { 81 ; CHECK: test_vshrq_n_u16 82 ; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 83 %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 84 ret <8 x i16> %vshr_n 85 } 86 87 define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) { 88 ; CHECK: test_vshrq_n_u32 89 ; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 90 %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3> 91 ret <4 x i32> %vshr_n 92 } 93 94 define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) { 95 ; CHECK: test_vshrq_n_u64 96 ; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 97 %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3> 98 ret <2 x i64> %vshr_n 99 } 100 101 define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) { 102 ; CHECK: test_vsra_n_s8 103 ; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 104 %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 105 %1 = add <8 x i8> %vsra_n, %a 106 ret <8 x i8> %1 107 } 108 109 define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) { 110 ; CHECK: test_vsra_n_s16 111 ; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 112 %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3> 113 %1 = add <4 x i16> %vsra_n, %a 114 ret <4 x i16> %1 115 } 116 117 define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) { 118 ; CHECK: test_vsra_n_s32 119 ; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 120 %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3> 121 %1 = add <2 x i32> %vsra_n, %a 122 ret <2 x i32> %1 123 } 124 125 define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) { 126 ; CHECK: test_vsraq_n_s8 127 ; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 128 %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 129 %1 = add <16 x i8> %vsra_n, %a 130 ret <16 x i8> %1 131 } 132 133 define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) { 134 ; CHECK: test_vsraq_n_s16 135 ; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 136 %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 137 %1 = add <8 x i16> %vsra_n, %a 138 ret <8 x i16> %1 139 } 140 141 define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) { 142 ; CHECK: test_vsraq_n_s32 143 ; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 144 %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3> 145 %1 = add <4 x i32> %vsra_n, %a 146 ret <4 x i32> %1 147 } 148 149 define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) { 150 ; CHECK: test_vsraq_n_s64 151 ; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 152 %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3> 153 %1 = add <2 x i64> %vsra_n, %a 154 ret <2 x i64> %1 155 } 156 157 define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) { 158 ; CHECK: test_vsra_n_u8 159 ; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3 160 %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 161 %1 = add <8 x i8> %vsra_n, %a 162 ret <8 x i8> %1 163 } 164 165 define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) { 166 ; CHECK: test_vsra_n_u16 167 ; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3 168 %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3> 169 %1 = add <4 x i16> %vsra_n, %a 170 ret <4 x i16> %1 171 } 172 173 define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) { 174 ; CHECK: test_vsra_n_u32 175 ; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3 176 %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3> 177 %1 = add <2 x i32> %vsra_n, %a 178 ret <2 x i32> %1 179 } 180 181 define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) { 182 ; CHECK: test_vsraq_n_u8 183 ; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3 184 %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 185 %1 = add <16 x i8> %vsra_n, %a 186 ret <16 x i8> %1 187 } 188 189 define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) { 190 ; CHECK: test_vsraq_n_u16 191 ; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3 192 %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 193 %1 = add <8 x i16> %vsra_n, %a 194 ret <8 x i16> %1 195 } 196 197 define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) { 198 ; CHECK: test_vsraq_n_u32 199 ; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3 200 %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3> 201 %1 = add <4 x i32> %vsra_n, %a 202 ret <4 x i32> %1 203 } 204 205 define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) { 206 ; CHECK: test_vsraq_n_u64 207 ; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3 208 %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3> 209 %1 = add <2 x i64> %vsra_n, %a 210 ret <2 x i64> %1 211 } 212 213 define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) { 214 ; CHECK: test_vshrn_n_s16 215 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 216 %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 217 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 218 ret <8 x i8> %vshrn_n 219 } 220 221 define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) { 222 ; CHECK: test_vshrn_n_s32 223 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 224 %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> 225 %vshrn_n = trunc <4 x i32> %1 to <4 x i16> 226 ret <4 x i16> %vshrn_n 227 } 228 229 define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) { 230 ; CHECK: test_vshrn_n_s64 231 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 232 %1 = ashr <2 x i64> %a, <i64 19, i64 19> 233 %vshrn_n = trunc <2 x i64> %1 to <2 x i32> 234 ret <2 x i32> %vshrn_n 235 } 236 237 define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) { 238 ; CHECK: test_vshrn_n_u16 239 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3 240 %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 241 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 242 ret <8 x i8> %vshrn_n 243 } 244 245 define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) { 246 ; CHECK: test_vshrn_n_u32 247 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9 248 %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9> 249 %vshrn_n = trunc <4 x i32> %1 to <4 x i16> 250 ret <4 x i16> %vshrn_n 251 } 252 253 define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) { 254 ; CHECK: test_vshrn_n_u64 255 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19 256 %1 = lshr <2 x i64> %a, <i64 19, i64 19> 257 %vshrn_n = trunc <2 x i64> %1 to <2 x i32> 258 ret <2 x i32> %vshrn_n 259 } 260 261 define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 262 ; CHECK: test_vshrn_high_n_s16 263 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 264 %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 265 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 266 %2 = bitcast <8 x i8> %a to <1 x i64> 267 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> 268 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 269 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> 270 ret <16 x i8> %4 271 } 272 273 define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 274 ; CHECK: test_vshrn_high_n_s32 275 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 276 %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9> 277 %vshrn_n = trunc <4 x i32> %1 to <4 x i16> 278 %2 = bitcast <4 x i16> %a to <1 x i64> 279 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> 280 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 281 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> 282 ret <8 x i16> %4 283 } 284 285 define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 286 ; CHECK: test_vshrn_high_n_s64 287 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 288 %1 = bitcast <2 x i32> %a to <1 x i64> 289 %2 = ashr <2 x i64> %b, <i64 19, i64 19> 290 %vshrn_n = trunc <2 x i64> %2 to <2 x i32> 291 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> 292 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 293 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> 294 ret <4 x i32> %4 295 } 296 297 define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { 298 ; CHECK: test_vshrn_high_n_u16 299 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 300 %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 301 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 302 %2 = bitcast <8 x i8> %a to <1 x i64> 303 %3 = bitcast <8 x i8> %vshrn_n to <1 x i64> 304 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 305 %4 = bitcast <2 x i64> %shuffle.i to <16 x i8> 306 ret <16 x i8> %4 307 } 308 309 define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { 310 ; CHECK: test_vshrn_high_n_u32 311 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 312 %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9> 313 %vshrn_n = trunc <4 x i32> %1 to <4 x i16> 314 %2 = bitcast <4 x i16> %a to <1 x i64> 315 %3 = bitcast <4 x i16> %vshrn_n to <1 x i64> 316 %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 317 %4 = bitcast <2 x i64> %shuffle.i to <8 x i16> 318 ret <8 x i16> %4 319 } 320 321 define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { 322 ; CHECK: test_vshrn_high_n_u64 323 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 324 %1 = bitcast <2 x i32> %a to <1 x i64> 325 %2 = lshr <2 x i64> %b, <i64 19, i64 19> 326 %vshrn_n = trunc <2 x i64> %2 to <2 x i32> 327 %3 = bitcast <2 x i32> %vshrn_n to <1 x i64> 328 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1> 329 %4 = bitcast <2 x i64> %shuffle.i to <4 x i32> 330 ret <4 x i32> %4 331 } 332 333 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 334 ; CHECK: test_vqshrun_high_n_s16 335 ; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 336 %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3) 337 %1 = bitcast <8 x i8> %a to <1 x i64> 338 %2 = bitcast <8 x i8> %vqshrun to <1 x i64> 339 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 340 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 341 ret <16 x i8> %3 342 } 343 344 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 345 ; CHECK: test_vqshrun_high_n_s32 346 ; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 347 %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9) 348 %1 = bitcast <4 x i16> %a to <1 x i64> 349 %2 = bitcast <4 x i16> %vqshrun to <1 x i64> 350 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 351 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 352 ret <8 x i16> %3 353 } 354 355 define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 356 ; CHECK: test_vqshrun_high_n_s64 357 ; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 358 %1 = bitcast <2 x i32> %a to <1 x i64> 359 %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19) 360 %2 = bitcast <2 x i32> %vqshrun to <1 x i64> 361 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 362 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 363 ret <4 x i32> %3 364 } 365 366 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 367 ; CHECK: test_vrshrn_high_n_s16 368 ; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 369 %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3) 370 %1 = bitcast <8 x i8> %a to <1 x i64> 371 %2 = bitcast <8 x i8> %vrshrn to <1 x i64> 372 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 373 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 374 ret <16 x i8> %3 375 } 376 377 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 378 ; CHECK: test_vrshrn_high_n_s32 379 ; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 380 %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9) 381 %1 = bitcast <4 x i16> %a to <1 x i64> 382 %2 = bitcast <4 x i16> %vrshrn to <1 x i64> 383 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 384 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 385 ret <8 x i16> %3 386 } 387 388 define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 389 ; CHECK: test_vrshrn_high_n_s64 390 ; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 391 %1 = bitcast <2 x i32> %a to <1 x i64> 392 %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19) 393 %2 = bitcast <2 x i32> %vrshrn to <1 x i64> 394 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 395 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 396 ret <4 x i32> %3 397 } 398 399 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 400 ; CHECK: test_vqrshrun_high_n_s16 401 ; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 402 %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3) 403 %1 = bitcast <8 x i8> %a to <1 x i64> 404 %2 = bitcast <8 x i8> %vqrshrun to <1 x i64> 405 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 406 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 407 ret <16 x i8> %3 408 } 409 410 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 411 ; CHECK: test_vqrshrun_high_n_s32 412 ; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 413 %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9) 414 %1 = bitcast <4 x i16> %a to <1 x i64> 415 %2 = bitcast <4 x i16> %vqrshrun to <1 x i64> 416 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 417 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 418 ret <8 x i16> %3 419 } 420 421 define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 422 ; CHECK: test_vqrshrun_high_n_s64 423 ; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 424 %1 = bitcast <2 x i32> %a to <1 x i64> 425 %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19) 426 %2 = bitcast <2 x i32> %vqrshrun to <1 x i64> 427 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 428 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 429 ret <4 x i32> %3 430 } 431 432 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 433 ; CHECK: test_vqshrn_high_n_s16 434 ; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 435 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3) 436 %1 = bitcast <8 x i8> %a to <1 x i64> 437 %2 = bitcast <8 x i8> %vqshrn to <1 x i64> 438 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 439 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 440 ret <16 x i8> %3 441 } 442 443 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 444 ; CHECK: test_vqshrn_high_n_s32 445 ; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 446 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9) 447 %1 = bitcast <4 x i16> %a to <1 x i64> 448 %2 = bitcast <4 x i16> %vqshrn to <1 x i64> 449 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 450 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 451 ret <8 x i16> %3 452 } 453 454 define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 455 ; CHECK: test_vqshrn_high_n_s64 456 ; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 457 %1 = bitcast <2 x i32> %a to <1 x i64> 458 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19) 459 %2 = bitcast <2 x i32> %vqshrn to <1 x i64> 460 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 461 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 462 ret <4 x i32> %3 463 } 464 465 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { 466 ; CHECK: test_vqshrn_high_n_u16 467 ; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 468 %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3) 469 %1 = bitcast <8 x i8> %a to <1 x i64> 470 %2 = bitcast <8 x i8> %vqshrn to <1 x i64> 471 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 472 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 473 ret <16 x i8> %3 474 } 475 476 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { 477 ; CHECK: test_vqshrn_high_n_u32 478 ; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 479 %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9) 480 %1 = bitcast <4 x i16> %a to <1 x i64> 481 %2 = bitcast <4 x i16> %vqshrn to <1 x i64> 482 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 483 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 484 ret <8 x i16> %3 485 } 486 487 define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { 488 ; CHECK: test_vqshrn_high_n_u64 489 ; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 490 %1 = bitcast <2 x i32> %a to <1 x i64> 491 %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19) 492 %2 = bitcast <2 x i32> %vqshrn to <1 x i64> 493 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 494 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 495 ret <4 x i32> %3 496 } 497 498 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) { 499 ; CHECK: test_vqrshrn_high_n_s16 500 ; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 501 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3) 502 %1 = bitcast <8 x i8> %a to <1 x i64> 503 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> 504 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 505 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 506 ret <16 x i8> %3 507 } 508 509 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) { 510 ; CHECK: test_vqrshrn_high_n_s32 511 ; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 512 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9) 513 %1 = bitcast <4 x i16> %a to <1 x i64> 514 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> 515 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 516 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 517 ret <8 x i16> %3 518 } 519 520 define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) { 521 ; CHECK: test_vqrshrn_high_n_s64 522 ; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 523 %1 = bitcast <2 x i32> %a to <1 x i64> 524 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19) 525 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> 526 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 527 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 528 ret <4 x i32> %3 529 } 530 531 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) { 532 ; CHECK: test_vqrshrn_high_n_u16 533 ; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3 534 %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3) 535 %1 = bitcast <8 x i8> %a to <1 x i64> 536 %2 = bitcast <8 x i8> %vqrshrn to <1 x i64> 537 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 538 %3 = bitcast <2 x i64> %shuffle.i to <16 x i8> 539 ret <16 x i8> %3 540 } 541 542 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) { 543 ; CHECK: test_vqrshrn_high_n_u32 544 ; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9 545 %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9) 546 %1 = bitcast <4 x i16> %a to <1 x i64> 547 %2 = bitcast <4 x i16> %vqrshrn to <1 x i64> 548 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 549 %3 = bitcast <2 x i64> %shuffle.i to <8 x i16> 550 ret <8 x i16> %3 551 } 552 553 define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) { 554 ; CHECK: test_vqrshrn_high_n_u64 555 ; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19 556 %1 = bitcast <2 x i32> %a to <1 x i64> 557 %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19) 558 %2 = bitcast <2 x i32> %vqrshrn to <1 x i64> 559 %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1> 560 %3 = bitcast <2 x i64> %shuffle.i to <4 x i32> 561 ret <4 x i32> %3 562 } 563 564 565 566 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) 567 568 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) 569 570 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) 571 572 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) 573 574 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) 575 576 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) 577 578 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) 579 580 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) 581 582 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) 583 584 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) 585 586 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) 587 588 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) 589 590 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) 591 592 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) 593 594 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) 595 596 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) 597 598 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) 599 600 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) 601 602 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) 603 604 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) 605 606 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) 607 608 declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) 609 610 declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) 611 612 declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32) 613 614 declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) 615 616 declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) 617 618 declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32) 619 620 declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) 621 622 declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) 623 624 declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32) 625 626 declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) 627 628 declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) 629 630 declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32) 631 632 define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) { 633 ; CHECK-LABEL: test_vcvt_n_s64_f64 634 ; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64 635 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64) 636 ret <1 x i64> %1 637 } 638 639 define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) { 640 ; CHECK-LABEL: test_vcvt_n_u64_f64 641 ; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64 642 %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64) 643 ret <1 x i64> %1 644 } 645 646 define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) { 647 ; CHECK-LABEL: test_vcvt_n_f64_s64 648 ; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 649 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64) 650 ret <1 x double> %1 651 } 652 653 define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) { 654 ; CHECK-LABEL: test_vcvt_n_f64_u64 655 ; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64 656 %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64) 657 ret <1 x double> %1 658 } 659 660 declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32) 661 declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32) 662 declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32) 663 declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32) 664