Home | History | Annotate | Download | only in AArch64
      1 ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
      2 
      3 define <8 x i8> @test_vshr_n_s8(<8 x i8> %a) {
      4 ; CHECK: test_vshr_n_s8
      5 ; CHECK: sshr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
      6   %vshr_n = ashr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
      7   ret <8 x i8> %vshr_n
      8 }
      9 
     10 define <4 x i16> @test_vshr_n_s16(<4 x i16> %a) {
     11 ; CHECK: test_vshr_n_s16
     12 ; CHECK: sshr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
     13   %vshr_n = ashr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
     14   ret <4 x i16> %vshr_n
     15 }
     16 
     17 define <2 x i32> @test_vshr_n_s32(<2 x i32> %a) {
     18 ; CHECK: test_vshr_n_s32
     19 ; CHECK: sshr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
     20   %vshr_n = ashr <2 x i32> %a, <i32 3, i32 3>
     21   ret <2 x i32> %vshr_n
     22 }
     23 
     24 define <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) {
     25 ; CHECK: test_vshrq_n_s8
     26 ; CHECK: sshr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
     27   %vshr_n = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
     28   ret <16 x i8> %vshr_n
     29 }
     30 
     31 define <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) {
     32 ; CHECK: test_vshrq_n_s16
     33 ; CHECK: sshr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
     34   %vshr_n = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
     35   ret <8 x i16> %vshr_n
     36 }
     37 
     38 define <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) {
     39 ; CHECK: test_vshrq_n_s32
     40 ; CHECK: sshr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
     41   %vshr_n = ashr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
     42   ret <4 x i32> %vshr_n
     43 }
     44 
     45 define <2 x i64> @test_vshrq_n_s64(<2 x i64> %a) {
     46 ; CHECK: test_vshrq_n_s64
     47 ; CHECK: sshr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
     48   %vshr_n = ashr <2 x i64> %a, <i64 3, i64 3>
     49   ret <2 x i64> %vshr_n
     50 }
     51 
     52 define <8 x i8> @test_vshr_n_u8(<8 x i8> %a) {
     53 ; CHECK: test_vshr_n_u8
     54 ; CHECK: ushr {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
     55   %vshr_n = lshr <8 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
     56   ret <8 x i8> %vshr_n
     57 }
     58 
     59 define <4 x i16> @test_vshr_n_u16(<4 x i16> %a) {
     60 ; CHECK: test_vshr_n_u16
     61 ; CHECK: ushr {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
     62   %vshr_n = lshr <4 x i16> %a, <i16 3, i16 3, i16 3, i16 3>
     63   ret <4 x i16> %vshr_n
     64 }
     65 
     66 define <2 x i32> @test_vshr_n_u32(<2 x i32> %a) {
     67 ; CHECK: test_vshr_n_u32
     68 ; CHECK: ushr {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
     69   %vshr_n = lshr <2 x i32> %a, <i32 3, i32 3>
     70   ret <2 x i32> %vshr_n
     71 }
     72 
     73 define <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) {
     74 ; CHECK: test_vshrq_n_u8
     75 ; CHECK: ushr {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
     76   %vshr_n = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
     77   ret <16 x i8> %vshr_n
     78 }
     79 
     80 define <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) {
     81 ; CHECK: test_vshrq_n_u16
     82 ; CHECK: ushr {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
     83   %vshr_n = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
     84   ret <8 x i16> %vshr_n
     85 }
     86 
     87 define <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) {
     88 ; CHECK: test_vshrq_n_u32
     89 ; CHECK: ushr {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
     90   %vshr_n = lshr <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
     91   ret <4 x i32> %vshr_n
     92 }
     93 
     94 define <2 x i64> @test_vshrq_n_u64(<2 x i64> %a) {
     95 ; CHECK: test_vshrq_n_u64
     96 ; CHECK: ushr {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
     97   %vshr_n = lshr <2 x i64> %a, <i64 3, i64 3>
     98   ret <2 x i64> %vshr_n
     99 }
    100 
    101 define <8 x i8> @test_vsra_n_s8(<8 x i8> %a, <8 x i8> %b) {
    102 ; CHECK: test_vsra_n_s8
    103 ; CHECK: ssra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
    104   %vsra_n = ashr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    105   %1 = add <8 x i8> %vsra_n, %a
    106   ret <8 x i8> %1
    107 }
    108 
    109 define <4 x i16> @test_vsra_n_s16(<4 x i16> %a, <4 x i16> %b) {
    110 ; CHECK: test_vsra_n_s16
    111 ; CHECK: ssra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
    112   %vsra_n = ashr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
    113   %1 = add <4 x i16> %vsra_n, %a
    114   ret <4 x i16> %1
    115 }
    116 
    117 define <2 x i32> @test_vsra_n_s32(<2 x i32> %a, <2 x i32> %b) {
    118 ; CHECK: test_vsra_n_s32
    119 ; CHECK: ssra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
    120   %vsra_n = ashr <2 x i32> %b, <i32 3, i32 3>
    121   %1 = add <2 x i32> %vsra_n, %a
    122   ret <2 x i32> %1
    123 }
    124 
    125 define <16 x i8> @test_vsraq_n_s8(<16 x i8> %a, <16 x i8> %b) {
    126 ; CHECK: test_vsraq_n_s8
    127 ; CHECK: ssra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
    128   %vsra_n = ashr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    129   %1 = add <16 x i8> %vsra_n, %a
    130   ret <16 x i8> %1
    131 }
    132 
    133 define <8 x i16> @test_vsraq_n_s16(<8 x i16> %a, <8 x i16> %b) {
    134 ; CHECK: test_vsraq_n_s16
    135 ; CHECK: ssra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
    136   %vsra_n = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    137   %1 = add <8 x i16> %vsra_n, %a
    138   ret <8 x i16> %1
    139 }
    140 
    141 define <4 x i32> @test_vsraq_n_s32(<4 x i32> %a, <4 x i32> %b) {
    142 ; CHECK: test_vsraq_n_s32
    143 ; CHECK: ssra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
    144   %vsra_n = ashr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
    145   %1 = add <4 x i32> %vsra_n, %a
    146   ret <4 x i32> %1
    147 }
    148 
    149 define <2 x i64> @test_vsraq_n_s64(<2 x i64> %a, <2 x i64> %b) {
    150 ; CHECK: test_vsraq_n_s64
    151 ; CHECK: ssra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
    152   %vsra_n = ashr <2 x i64> %b, <i64 3, i64 3>
    153   %1 = add <2 x i64> %vsra_n, %a
    154   ret <2 x i64> %1
    155 }
    156 
    157 define <8 x i8> @test_vsra_n_u8(<8 x i8> %a, <8 x i8> %b) {
    158 ; CHECK: test_vsra_n_u8
    159 ; CHECK: usra {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, #3
    160   %vsra_n = lshr <8 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    161   %1 = add <8 x i8> %vsra_n, %a
    162   ret <8 x i8> %1
    163 }
    164 
    165 define <4 x i16> @test_vsra_n_u16(<4 x i16> %a, <4 x i16> %b) {
    166 ; CHECK: test_vsra_n_u16
    167 ; CHECK: usra {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, #3
    168   %vsra_n = lshr <4 x i16> %b, <i16 3, i16 3, i16 3, i16 3>
    169   %1 = add <4 x i16> %vsra_n, %a
    170   ret <4 x i16> %1
    171 }
    172 
    173 define <2 x i32> @test_vsra_n_u32(<2 x i32> %a, <2 x i32> %b) {
    174 ; CHECK: test_vsra_n_u32
    175 ; CHECK: usra {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, #3
    176   %vsra_n = lshr <2 x i32> %b, <i32 3, i32 3>
    177   %1 = add <2 x i32> %vsra_n, %a
    178   ret <2 x i32> %1
    179 }
    180 
    181 define <16 x i8> @test_vsraq_n_u8(<16 x i8> %a, <16 x i8> %b) {
    182 ; CHECK: test_vsraq_n_u8
    183 ; CHECK: usra {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #3
    184   %vsra_n = lshr <16 x i8> %b, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
    185   %1 = add <16 x i8> %vsra_n, %a
    186   ret <16 x i8> %1
    187 }
    188 
    189 define <8 x i16> @test_vsraq_n_u16(<8 x i16> %a, <8 x i16> %b) {
    190 ; CHECK: test_vsraq_n_u16
    191 ; CHECK: usra {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, #3
    192   %vsra_n = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    193   %1 = add <8 x i16> %vsra_n, %a
    194   ret <8 x i16> %1
    195 }
    196 
    197 define <4 x i32> @test_vsraq_n_u32(<4 x i32> %a, <4 x i32> %b) {
    198 ; CHECK: test_vsraq_n_u32
    199 ; CHECK: usra {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, #3
    200   %vsra_n = lshr <4 x i32> %b, <i32 3, i32 3, i32 3, i32 3>
    201   %1 = add <4 x i32> %vsra_n, %a
    202   ret <4 x i32> %1
    203 }
    204 
    205 define <2 x i64> @test_vsraq_n_u64(<2 x i64> %a, <2 x i64> %b) {
    206 ; CHECK: test_vsraq_n_u64
    207 ; CHECK: usra {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, #3
    208   %vsra_n = lshr <2 x i64> %b, <i64 3, i64 3>
    209   %1 = add <2 x i64> %vsra_n, %a
    210   ret <2 x i64> %1
    211 }
    212 
    213 define <8 x i8> @test_vshrn_n_s16(<8 x i16> %a) {
    214 ; CHECK: test_vshrn_n_s16
    215 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
    216   %1 = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    217   %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
    218   ret <8 x i8> %vshrn_n
    219 }
    220 
    221 define <4 x i16> @test_vshrn_n_s32(<4 x i32> %a) {
    222 ; CHECK: test_vshrn_n_s32
    223 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
    224   %1 = ashr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
    225   %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
    226   ret <4 x i16> %vshrn_n
    227 }
    228 
    229 define <2 x i32> @test_vshrn_n_s64(<2 x i64> %a) {
    230 ; CHECK: test_vshrn_n_s64
    231 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
    232   %1 = ashr <2 x i64> %a, <i64 19, i64 19>
    233   %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
    234   ret <2 x i32> %vshrn_n
    235 }
    236 
    237 define <8 x i8> @test_vshrn_n_u16(<8 x i16> %a) {
    238 ; CHECK: test_vshrn_n_u16
    239 ; CHECK: shrn {{v[0-9]+}}.8b, {{v[0-9]+}}.8h, #3
    240   %1 = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    241   %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
    242   ret <8 x i8> %vshrn_n
    243 }
    244 
    245 define <4 x i16> @test_vshrn_n_u32(<4 x i32> %a) {
    246 ; CHECK: test_vshrn_n_u32
    247 ; CHECK: shrn {{v[0-9]+}}.4h, {{v[0-9]+}}.4s, #9
    248   %1 = lshr <4 x i32> %a, <i32 9, i32 9, i32 9, i32 9>
    249   %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
    250   ret <4 x i16> %vshrn_n
    251 }
    252 
    253 define <2 x i32> @test_vshrn_n_u64(<2 x i64> %a) {
    254 ; CHECK: test_vshrn_n_u64
    255 ; CHECK: shrn {{v[0-9]+}}.2s, {{v[0-9]+}}.2d, #19
    256   %1 = lshr <2 x i64> %a, <i64 19, i64 19>
    257   %vshrn_n = trunc <2 x i64> %1 to <2 x i32>
    258   ret <2 x i32> %vshrn_n
    259 }
    260 
    261 define <16 x i8> @test_vshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
    262 ; CHECK: test_vshrn_high_n_s16
    263 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    264   %1 = ashr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    265   %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
    266   %2 = bitcast <8 x i8> %a to <1 x i64>
    267   %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
    268   %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
    269   %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    270   ret <16 x i8> %4
    271 }
    272 
    273 define <8 x i16> @test_vshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
    274 ; CHECK: test_vshrn_high_n_s32
    275 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    276   %1 = ashr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
    277   %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
    278   %2 = bitcast <4 x i16> %a to <1 x i64>
    279   %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
    280   %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
    281   %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    282   ret <8 x i16> %4
    283 }
    284 
    285 define <4 x i32> @test_vshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
    286 ; CHECK: test_vshrn_high_n_s64
    287 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    288   %1 = bitcast <2 x i32> %a to <1 x i64>
    289   %2 = ashr <2 x i64> %b, <i64 19, i64 19>
    290   %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
    291   %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
    292   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
    293   %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    294   ret <4 x i32> %4
    295 }
    296 
    297 define <16 x i8> @test_vshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
    298 ; CHECK: test_vshrn_high_n_u16
    299 ; CHECK: shrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    300   %1 = lshr <8 x i16> %b, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
    301   %vshrn_n = trunc <8 x i16> %1 to <8 x i8>
    302   %2 = bitcast <8 x i8> %a to <1 x i64>
    303   %3 = bitcast <8 x i8> %vshrn_n to <1 x i64>
    304   %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
    305   %4 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    306   ret <16 x i8> %4
    307 }
    308 
    309 define <8 x i16> @test_vshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
    310 ; CHECK: test_vshrn_high_n_u32
    311 ; CHECK: shrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    312   %1 = lshr <4 x i32> %b, <i32 9, i32 9, i32 9, i32 9>
    313   %vshrn_n = trunc <4 x i32> %1 to <4 x i16>
    314   %2 = bitcast <4 x i16> %a to <1 x i64>
    315   %3 = bitcast <4 x i16> %vshrn_n to <1 x i64>
    316   %shuffle.i = shufflevector <1 x i64> %2, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
    317   %4 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    318   ret <8 x i16> %4
    319 }
    320 
    321 define <4 x i32> @test_vshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
    322 ; CHECK: test_vshrn_high_n_u64
    323 ; CHECK: shrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    324   %1 = bitcast <2 x i32> %a to <1 x i64>
    325   %2 = lshr <2 x i64> %b, <i64 19, i64 19>
    326   %vshrn_n = trunc <2 x i64> %2 to <2 x i32>
    327   %3 = bitcast <2 x i32> %vshrn_n to <1 x i64>
    328   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %3, <2 x i32> <i32 0, i32 1>
    329   %4 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    330   ret <4 x i32> %4
    331 }
    332 
    333 define <16 x i8> @test_vqshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
    334 ; CHECK: test_vqshrun_high_n_s16
    335 ; CHECK: sqshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    336   %vqshrun = tail call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %b, i32 3)
    337   %1 = bitcast <8 x i8> %a to <1 x i64>
    338   %2 = bitcast <8 x i8> %vqshrun to <1 x i64>
    339   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    340   %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    341   ret <16 x i8> %3
    342 }
    343 
    344 define <8 x i16> @test_vqshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
    345 ; CHECK: test_vqshrun_high_n_s32
    346 ; CHECK: sqshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    347   %vqshrun = tail call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %b, i32 9)
    348   %1 = bitcast <4 x i16> %a to <1 x i64>
    349   %2 = bitcast <4 x i16> %vqshrun to <1 x i64>
    350   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    351   %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    352   ret <8 x i16> %3
    353 }
    354 
    355 define <4 x i32> @test_vqshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
    356 ; CHECK: test_vqshrun_high_n_s64
    357 ; CHECK: sqshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    358   %1 = bitcast <2 x i32> %a to <1 x i64>
    359   %vqshrun = tail call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %b, i32 19)
    360   %2 = bitcast <2 x i32> %vqshrun to <1 x i64>
    361   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    362   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    363   ret <4 x i32> %3
    364 }
    365 
    366 define <16 x i8> @test_vrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
    367 ; CHECK: test_vrshrn_high_n_s16
    368 ; CHECK: rshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    369   %vrshrn = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %b, i32 3)
    370   %1 = bitcast <8 x i8> %a to <1 x i64>
    371   %2 = bitcast <8 x i8> %vrshrn to <1 x i64>
    372   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    373   %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    374   ret <16 x i8> %3
    375 }
    376 
    377 define <8 x i16> @test_vrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
    378 ; CHECK: test_vrshrn_high_n_s32
    379 ; CHECK: rshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    380   %vrshrn = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 9)
    381   %1 = bitcast <4 x i16> %a to <1 x i64>
    382   %2 = bitcast <4 x i16> %vrshrn to <1 x i64>
    383   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    384   %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    385   ret <8 x i16> %3
    386 }
    387 
    388 define <4 x i32> @test_vrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
    389 ; CHECK: test_vrshrn_high_n_s64
    390 ; CHECK: rshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    391   %1 = bitcast <2 x i32> %a to <1 x i64>
    392   %vrshrn = tail call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %b, i32 19)
    393   %2 = bitcast <2 x i32> %vrshrn to <1 x i64>
    394   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    395   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    396   ret <4 x i32> %3
    397 }
    398 
    399 define <16 x i8> @test_vqrshrun_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
    400 ; CHECK: test_vqrshrun_high_n_s16
    401 ; CHECK: sqrshrun2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    402   %vqrshrun = tail call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %b, i32 3)
    403   %1 = bitcast <8 x i8> %a to <1 x i64>
    404   %2 = bitcast <8 x i8> %vqrshrun to <1 x i64>
    405   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    406   %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    407   ret <16 x i8> %3
    408 }
    409 
    410 define <8 x i16> @test_vqrshrun_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
    411 ; CHECK: test_vqrshrun_high_n_s32
    412 ; CHECK: sqrshrun2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    413   %vqrshrun = tail call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %b, i32 9)
    414   %1 = bitcast <4 x i16> %a to <1 x i64>
    415   %2 = bitcast <4 x i16> %vqrshrun to <1 x i64>
    416   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    417   %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    418   ret <8 x i16> %3
    419 }
    420 
    421 define <4 x i32> @test_vqrshrun_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
    422 ; CHECK: test_vqrshrun_high_n_s64
    423 ; CHECK: sqrshrun2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    424   %1 = bitcast <2 x i32> %a to <1 x i64>
    425   %vqrshrun = tail call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %b, i32 19)
    426   %2 = bitcast <2 x i32> %vqrshrun to <1 x i64>
    427   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    428   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    429   ret <4 x i32> %3
    430 }
    431 
    432 define <16 x i8> @test_vqshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
    433 ; CHECK: test_vqshrn_high_n_s16
    434 ; CHECK: sqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    435   %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %b, i32 3)
    436   %1 = bitcast <8 x i8> %a to <1 x i64>
    437   %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
    438   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    439   %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    440   ret <16 x i8> %3
    441 }
    442 
    443 define <8 x i16> @test_vqshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
    444 ; CHECK: test_vqshrn_high_n_s32
    445 ; CHECK: sqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    446   %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %b, i32 9)
    447   %1 = bitcast <4 x i16> %a to <1 x i64>
    448   %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
    449   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    450   %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    451   ret <8 x i16> %3
    452 }
    453 
    454 define <4 x i32> @test_vqshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
    455 ; CHECK: test_vqshrn_high_n_s64
    456 ; CHECK: sqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    457   %1 = bitcast <2 x i32> %a to <1 x i64>
    458   %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %b, i32 19)
    459   %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
    460   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    461   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    462   ret <4 x i32> %3
    463 }
    464 
    465 define <16 x i8> @test_vqshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
    466 ; CHECK: test_vqshrn_high_n_u16
    467 ; CHECK: uqshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    468   %vqshrn = tail call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %b, i32 3)
    469   %1 = bitcast <8 x i8> %a to <1 x i64>
    470   %2 = bitcast <8 x i8> %vqshrn to <1 x i64>
    471   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    472   %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    473   ret <16 x i8> %3
    474 }
    475 
    476 define <8 x i16> @test_vqshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
    477 ; CHECK: test_vqshrn_high_n_u32
    478 ; CHECK: uqshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    479   %vqshrn = tail call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %b, i32 9)
    480   %1 = bitcast <4 x i16> %a to <1 x i64>
    481   %2 = bitcast <4 x i16> %vqshrn to <1 x i64>
    482   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    483   %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    484   ret <8 x i16> %3
    485 }
    486 
    487 define <4 x i32> @test_vqshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
    488 ; CHECK: test_vqshrn_high_n_u64
    489 ; CHECK: uqshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    490   %1 = bitcast <2 x i32> %a to <1 x i64>
    491   %vqshrn = tail call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %b, i32 19)
    492   %2 = bitcast <2 x i32> %vqshrn to <1 x i64>
    493   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    494   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    495   ret <4 x i32> %3
    496 }
    497 
    498 define <16 x i8> @test_vqrshrn_high_n_s16(<8 x i8> %a, <8 x i16> %b) {
    499 ; CHECK: test_vqrshrn_high_n_s16
    500 ; CHECK: sqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    501   %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %b, i32 3)
    502   %1 = bitcast <8 x i8> %a to <1 x i64>
    503   %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
    504   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    505   %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    506   ret <16 x i8> %3
    507 }
    508 
    509 define <8 x i16> @test_vqrshrn_high_n_s32(<4 x i16> %a, <4 x i32> %b) {
    510 ; CHECK: test_vqrshrn_high_n_s32
    511 ; CHECK: sqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    512   %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %b, i32 9)
    513   %1 = bitcast <4 x i16> %a to <1 x i64>
    514   %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
    515   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    516   %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    517   ret <8 x i16> %3
    518 }
    519 
    520 define <4 x i32> @test_vqrshrn_high_n_s64(<2 x i32> %a, <2 x i64> %b) {
    521 ; CHECK: test_vqrshrn_high_n_s64
    522 ; CHECK: sqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    523   %1 = bitcast <2 x i32> %a to <1 x i64>
    524   %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %b, i32 19)
    525   %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
    526   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    527   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    528   ret <4 x i32> %3
    529 }
    530 
    531 define <16 x i8> @test_vqrshrn_high_n_u16(<8 x i8> %a, <8 x i16> %b) {
    532 ; CHECK: test_vqrshrn_high_n_u16
    533 ; CHECK: uqrshrn2 {{v[0-9]+}}.16b, {{v[0-9]+}}.8h, #3
    534   %vqrshrn = tail call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %b, i32 3)
    535   %1 = bitcast <8 x i8> %a to <1 x i64>
    536   %2 = bitcast <8 x i8> %vqrshrn to <1 x i64>
    537   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    538   %3 = bitcast <2 x i64> %shuffle.i to <16 x i8>
    539   ret <16 x i8> %3
    540 }
    541 
    542 define <8 x i16> @test_vqrshrn_high_n_u32(<4 x i16> %a, <4 x i32> %b) {
    543 ; CHECK: test_vqrshrn_high_n_u32
    544 ; CHECK: uqrshrn2 {{v[0-9]+}}.8h, {{v[0-9]+}}.4s, #9
    545   %vqrshrn = tail call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %b, i32 9)
    546   %1 = bitcast <4 x i16> %a to <1 x i64>
    547   %2 = bitcast <4 x i16> %vqrshrn to <1 x i64>
    548   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    549   %3 = bitcast <2 x i64> %shuffle.i to <8 x i16>
    550   ret <8 x i16> %3
    551 }
    552 
    553 define <4 x i32> @test_vqrshrn_high_n_u64(<2 x i32> %a, <2 x i64> %b) {
    554 ; CHECK: test_vqrshrn_high_n_u64
    555 ; CHECK: uqrshrn2 {{v[0-9]+}}.4s, {{v[0-9]+}}.2d, #19
    556   %1 = bitcast <2 x i32> %a to <1 x i64>
    557   %vqrshrn = tail call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %b, i32 19)
    558   %2 = bitcast <2 x i32> %vqrshrn to <1 x i64>
    559   %shuffle.i = shufflevector <1 x i64> %1, <1 x i64> %2, <2 x i32> <i32 0, i32 1>
    560   %3 = bitcast <2 x i64> %shuffle.i to <4 x i32>
    561   ret <4 x i32> %3
    562 }
    563 
    564 
    565 
    566 declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32)
    567 
    568 declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32)
    569 
    570 declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32)
    571 
    572 declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32)
    573 
    574 declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32)
    575 
    576 declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32)
    577 
    578 declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32)
    579 
    580 declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32)
    581 
    582 declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32)
    583 
    584 declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32)
    585 
    586 declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32)
    587 
    588 declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32)
    589 
    590 declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32)
    591 
    592 declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32)
    593 
    594 declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32)
    595 
    596 declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32)
    597 
    598 declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32)
    599 
    600 declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32)
    601 
    602 declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32)
    603 
    604 declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32)
    605 
    606 declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32)
    607 
    608 declare <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32)
    609 
    610 declare <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32)
    611 
    612 declare <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64>, i32)
    613 
    614 declare <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32)
    615 
    616 declare <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32)
    617 
    618 declare <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64>, i32)
    619 
    620 declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32)
    621 
    622 declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32)
    623 
    624 declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double>, i32)
    625 
    626 declare <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32)
    627 
    628 declare <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32)
    629 
    630 declare <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double>, i32)
    631 
    632 define <1 x i64> @test_vcvt_n_s64_f64(<1 x double> %a) {
    633 ; CHECK-LABEL: test_vcvt_n_s64_f64
    634 ; CHECK: fcvtzs d{{[0-9]+}}, d{{[0-9]+}}, #64
    635   %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> %a, i32 64)
    636   ret <1 x i64> %1
    637 }
    638 
    639 define <1 x i64> @test_vcvt_n_u64_f64(<1 x double> %a) {
    640 ; CHECK-LABEL: test_vcvt_n_u64_f64
    641 ; CHECK: fcvtzu d{{[0-9]+}}, d{{[0-9]+}}, #64
    642   %1 = tail call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> %a, i32 64)
    643   ret <1 x i64> %1
    644 }
    645 
    646 define <1 x double> @test_vcvt_n_f64_s64(<1 x i64> %a) {
    647 ; CHECK-LABEL: test_vcvt_n_f64_s64
    648 ; CHECK: scvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
    649   %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
    650   ret <1 x double> %1
    651 }
    652 
    653 define <1 x double> @test_vcvt_n_f64_u64(<1 x i64> %a) {
    654 ; CHECK-LABEL: test_vcvt_n_f64_u64
    655 ; CHECK: ucvtf d{{[0-9]+}}, d{{[0-9]+}}, #64
    656   %1 = tail call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> %a, i32 64)
    657   ret <1 x double> %1
    658 }
    659 
    660 declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double>, i32)
    661 declare <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double>, i32)
    662 declare <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64>, i32)
    663 declare <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64>, i32)
    664