Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
      4 
      5 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
      6 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
      7 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
      8 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
      9 
     10 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
     11 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
     12 
     13 ; Tests showing replacement of variable rotates with immediate splat versions.
     14 
     15 define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
     16 ; KNL-LABEL: test_splat_rol_v16i32:
     17 ; KNL:       # %bb.0:
     18 ; KNL-NEXT:    vprold $5, %zmm0, %zmm2
     19 ; KNL-NEXT:    kmovw %edi, %k1
     20 ; KNL-NEXT:    vprold $5, %zmm0, %zmm1 {%k1}
     21 ; KNL-NEXT:    vprold $5, %zmm0, %zmm0 {%k1} {z}
     22 ; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
     23 ; KNL-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
     24 ; KNL-NEXT:    retq
     25 ;
     26 ; SKX-LABEL: test_splat_rol_v16i32:
     27 ; SKX:       # %bb.0:
     28 ; SKX-NEXT:    vprold $5, %zmm0, %zmm2
     29 ; SKX-NEXT:    kmovd %edi, %k1
     30 ; SKX-NEXT:    vprold $5, %zmm0, %zmm1 {%k1}
     31 ; SKX-NEXT:    vprold $5, %zmm0, %zmm0 {%k1} {z}
     32 ; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
     33 ; SKX-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
     34 ; SKX-NEXT:    retq
     35   %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
     36   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2)
     37   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1)
     38   %res3 = add <16 x i32> %res, %res1
     39   %res4 = add <16 x i32> %res3, %res2
     40   ret <16 x i32> %res4
     41 }
     42 
     43 define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
     44 ; KNL-LABEL: test_splat_rol_v8i64:
     45 ; KNL:       # %bb.0:
     46 ; KNL-NEXT:    vprolq $5, %zmm0, %zmm2
     47 ; KNL-NEXT:    kmovw %edi, %k1
     48 ; KNL-NEXT:    vprolq $5, %zmm0, %zmm1 {%k1}
     49 ; KNL-NEXT:    vprolq $5, %zmm0, %zmm0 {%k1} {z}
     50 ; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     51 ; KNL-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
     52 ; KNL-NEXT:    retq
     53 ;
     54 ; SKX-LABEL: test_splat_rol_v8i64:
     55 ; SKX:       # %bb.0:
     56 ; SKX-NEXT:    vprolq $5, %zmm0, %zmm2
     57 ; SKX-NEXT:    kmovd %edi, %k1
     58 ; SKX-NEXT:    vprolq $5, %zmm0, %zmm1 {%k1}
     59 ; SKX-NEXT:    vprolq $5, %zmm0, %zmm0 {%k1} {z}
     60 ; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
     61 ; SKX-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
     62 ; SKX-NEXT:    retq
     63   %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
     64   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2)
     65   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1)
     66   %res3 = add <8 x i64> %res, %res1
     67   %res4 = add <8 x i64> %res3, %res2
     68   ret <8 x i64> %res4
     69 }
     70 
     71 define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
     72 ; KNL-LABEL: test_splat_ror_v16i32:
     73 ; KNL:       # %bb.0:
     74 ; KNL-NEXT:    vprord $5, %zmm0, %zmm2
     75 ; KNL-NEXT:    kmovw %edi, %k1
     76 ; KNL-NEXT:    vprord $5, %zmm0, %zmm1 {%k1}
     77 ; KNL-NEXT:    vprord $5, %zmm0, %zmm0 {%k1} {z}
     78 ; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
     79 ; KNL-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
     80 ; KNL-NEXT:    retq
     81 ;
     82 ; SKX-LABEL: test_splat_ror_v16i32:
     83 ; SKX:       # %bb.0:
     84 ; SKX-NEXT:    vprord $5, %zmm0, %zmm2
     85 ; SKX-NEXT:    kmovd %edi, %k1
     86 ; SKX-NEXT:    vprord $5, %zmm0, %zmm1 {%k1}
     87 ; SKX-NEXT:    vprord $5, %zmm0, %zmm0 {%k1} {z}
     88 ; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
     89 ; SKX-NEXT:    vpaddd %zmm2, %zmm0, %zmm0
     90 ; SKX-NEXT:    retq
     91   %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
     92   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> zeroinitializer, i16 %x2)
     93   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 -1)
     94   %res3 = add <16 x i32> %res, %res1
     95   %res4 = add <16 x i32> %res3, %res2
     96   ret <16 x i32> %res4
     97 }
     98 
     99 define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
    100 ; KNL-LABEL: test_splat_ror_v8i64:
    101 ; KNL:       # %bb.0:
    102 ; KNL-NEXT:    vprorq $5, %zmm0, %zmm2
    103 ; KNL-NEXT:    kmovw %edi, %k1
    104 ; KNL-NEXT:    vprorq $5, %zmm0, %zmm1 {%k1}
    105 ; KNL-NEXT:    vprorq $5, %zmm0, %zmm0 {%k1} {z}
    106 ; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    107 ; KNL-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
    108 ; KNL-NEXT:    retq
    109 ;
    110 ; SKX-LABEL: test_splat_ror_v8i64:
    111 ; SKX:       # %bb.0:
    112 ; SKX-NEXT:    vprorq $5, %zmm0, %zmm2
    113 ; SKX-NEXT:    kmovd %edi, %k1
    114 ; SKX-NEXT:    vprorq $5, %zmm0, %zmm1 {%k1}
    115 ; SKX-NEXT:    vprorq $5, %zmm0, %zmm0 {%k1} {z}
    116 ; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    117 ; SKX-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
    118 ; SKX-NEXT:    retq
    119   %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
    120   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> zeroinitializer, i8 %x2)
    121   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 -1)
    122   %res3 = add <8 x i64> %res, %res1
    123   %res4 = add <8 x i64> %res3, %res2
    124   ret <8 x i64> %res4
    125 }
    126 
    127 ; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions.
    128 
    129 define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
    130 ; KNL-LABEL: test_splat_bounds_rol_v16i32:
    131 ; KNL:       # %bb.0:
    132 ; KNL-NEXT:    kmovw %edi, %k1
    133 ; KNL-NEXT:    vprold $1, %zmm0, %zmm1 {%k1}
    134 ; KNL-NEXT:    vprold $31, %zmm0, %zmm2 {%k1} {z}
    135 ; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
    136 ; KNL-NEXT:    vprold $30, %zmm0, %zmm0
    137 ; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    138 ; KNL-NEXT:    retq
    139 ;
    140 ; SKX-LABEL: test_splat_bounds_rol_v16i32:
    141 ; SKX:       # %bb.0:
    142 ; SKX-NEXT:    kmovd %edi, %k1
    143 ; SKX-NEXT:    vprold $1, %zmm0, %zmm1 {%k1}
    144 ; SKX-NEXT:    vprold $31, %zmm0, %zmm2 {%k1} {z}
    145 ; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
    146 ; SKX-NEXT:    vprold $30, %zmm0, %zmm0
    147 ; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    148 ; SKX-NEXT:    retq
    149   %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
    150   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
    151   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
    152   %res3 = add <16 x i32> %res, %res1
    153   %res4 = add <16 x i32> %res3, %res2
    154   ret <16 x i32> %res4
    155 }
    156 
    157 define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
    158 ; KNL-LABEL: test_splat_bounds_rol_v8i64:
    159 ; KNL:       # %bb.0:
    160 ; KNL-NEXT:    kmovw %edi, %k1
    161 ; KNL-NEXT:    vprolq $62, %zmm0, %zmm1 {%k1}
    162 ; KNL-NEXT:    vprolq $1, %zmm0, %zmm2 {%k1} {z}
    163 ; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
    164 ; KNL-NEXT:    vprolq $63, %zmm0, %zmm0
    165 ; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    166 ; KNL-NEXT:    retq
    167 ;
    168 ; SKX-LABEL: test_splat_bounds_rol_v8i64:
    169 ; SKX:       # %bb.0:
    170 ; SKX-NEXT:    kmovd %edi, %k1
    171 ; SKX-NEXT:    vprolq $62, %zmm0, %zmm1 {%k1}
    172 ; SKX-NEXT:    vprolq $1, %zmm0, %zmm2 {%k1} {z}
    173 ; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
    174 ; SKX-NEXT:    vprolq $63, %zmm0, %zmm0
    175 ; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    176 ; SKX-NEXT:    retq
    177   %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
    178   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
    179   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
    180   %res3 = add <8 x i64> %res, %res1
    181   %res4 = add <8 x i64> %res3, %res2
    182   ret <8 x i64> %res4
    183 }
    184 
    185 define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
    186 ; KNL-LABEL: test_splat_bounds_ror_v16i32:
    187 ; KNL:       # %bb.0:
    188 ; KNL-NEXT:    kmovw %edi, %k1
    189 ; KNL-NEXT:    vprord $1, %zmm0, %zmm1 {%k1}
    190 ; KNL-NEXT:    vprord $31, %zmm0, %zmm2 {%k1} {z}
    191 ; KNL-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
    192 ; KNL-NEXT:    vprord $30, %zmm0, %zmm0
    193 ; KNL-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    194 ; KNL-NEXT:    retq
    195 ;
    196 ; SKX-LABEL: test_splat_bounds_ror_v16i32:
    197 ; SKX:       # %bb.0:
    198 ; SKX-NEXT:    kmovd %edi, %k1
    199 ; SKX-NEXT:    vprord $1, %zmm0, %zmm1 {%k1}
    200 ; SKX-NEXT:    vprord $31, %zmm0, %zmm2 {%k1} {z}
    201 ; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
    202 ; SKX-NEXT:    vprord $30, %zmm0, %zmm0
    203 ; SKX-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
    204 ; SKX-NEXT:    retq
    205   %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
    206   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
    207   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
    208   %res3 = add <16 x i32> %res, %res1
    209   %res4 = add <16 x i32> %res3, %res2
    210   ret <16 x i32> %res4
    211 }
    212 
    213 define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
    214 ; KNL-LABEL: test_splat_bounds_ror_v8i64:
    215 ; KNL:       # %bb.0:
    216 ; KNL-NEXT:    kmovw %edi, %k1
    217 ; KNL-NEXT:    vprorq $62, %zmm0, %zmm1 {%k1}
    218 ; KNL-NEXT:    vprorq $1, %zmm0, %zmm2 {%k1} {z}
    219 ; KNL-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
    220 ; KNL-NEXT:    vprorq $63, %zmm0, %zmm0
    221 ; KNL-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    222 ; KNL-NEXT:    retq
    223 ;
    224 ; SKX-LABEL: test_splat_bounds_ror_v8i64:
    225 ; SKX:       # %bb.0:
    226 ; SKX-NEXT:    kmovd %edi, %k1
    227 ; SKX-NEXT:    vprorq $62, %zmm0, %zmm1 {%k1}
    228 ; SKX-NEXT:    vprorq $1, %zmm0, %zmm2 {%k1} {z}
    229 ; SKX-NEXT:    vpaddq %zmm2, %zmm1, %zmm1
    230 ; SKX-NEXT:    vprorq $63, %zmm0, %zmm0
    231 ; SKX-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
    232 ; SKX-NEXT:    retq
    233   %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
    234   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
    235   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
    236   %res3 = add <8 x i64> %res, %res1
    237   %res4 = add <8 x i64> %res3, %res2
    238   ret <8 x i64> %res4
    239 }
    240 
    241 ; Constant folding
    242 ; We also test with a target shuffle so that this can't be constant folded upon creation, it must
    243 ; wait until the target shuffle has been constant folded in combineX86ShufflesRecursively.
    244 
    245 define <8 x i64> @test_fold_rol_v8i64() {
    246 ; CHECK-LABEL: test_fold_rol_v8i64:
    247 ; CHECK:       # %bb.0:
    248 ; CHECK-NEXT:    vmovaps {{.*#+}} zmm0 = [1,2,4,9223372036854775808,2,4611686018427387904,9223372036854775808,9223372036854775808]
    249 ; CHECK-NEXT:    retq
    250   %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1)
    251   ret <8 x i64> %res
    252 }
    253 
    254 define <16 x i32> @test_fold_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
    255 ; CHECK-LABEL: test_fold_rol_v16i32:
    256 ; CHECK:       # %bb.0:
    257 ; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    258 ; CHECK-NEXT:    vprolvd {{.*}}(%rip), %zmm0, %zmm0
    259 ; CHECK-NEXT:    retq
    260   %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
    261   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)
    262   ret <16 x i32> %res1
    263 }
    264 
    265 define <8 x i64> @test_fold_ror_v8i64() {
    266 ; CHECK-LABEL: test_fold_ror_v8i64:
    267 ; CHECK:       # %bb.0:
    268 ; CHECK-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
    269 ; CHECK-NEXT:    vprorvq {{.*}}(%rip), %zmm0, %zmm0
    270 ; CHECK-NEXT:    retq
    271   %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> zeroinitializer, i8 -1)
    272   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %res0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> zeroinitializer, i8 -1)
    273   ret <8 x i64> %res1
    274 }
    275 
    276 define <16 x i32> @test_fold_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
    277 ; CHECK-LABEL: test_fold_ror_v16i32:
    278 ; CHECK:       # %bb.0:
    279 ; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
    280 ; CHECK-NEXT:    vprorvd {{.*}}(%rip), %zmm0, %zmm0
    281 ; CHECK-NEXT:    retq
    282   %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
    283   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)
    284   ret <16 x i32> %res1
    285 }
    286